Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@86
|
39 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@86
|
40
|
matthiasm@0
|
41 }
|
matthiasm@0
|
42
|
Chris@35
|
43 Chordino::~Chordino()
|
matthiasm@0
|
44 {
|
Chris@35
|
45 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
46 }
|
matthiasm@0
|
47
|
matthiasm@0
|
48 string
|
Chris@35
|
49 Chordino::getIdentifier() const
|
matthiasm@0
|
50 {
|
Chris@23
|
51 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
52 return "chordino";
|
matthiasm@0
|
53 }
|
matthiasm@0
|
54
|
matthiasm@0
|
55 string
|
Chris@35
|
56 Chordino::getName() const
|
matthiasm@0
|
57 {
|
Chris@23
|
58 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
59 return "Chordino";
|
matthiasm@0
|
60 }
|
matthiasm@0
|
61
|
matthiasm@0
|
62 string
|
Chris@35
|
63 Chordino::getDescription() const
|
matthiasm@0
|
64 {
|
Chris@23
|
65 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
66 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
67 }
|
matthiasm@0
|
68
|
matthiasm@50
|
69 Chordino::ParameterList
|
matthiasm@50
|
70 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
71 {
|
matthiasm@50
|
72 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
73 ParameterList list;
|
matthiasm@50
|
74
|
matthiasm@50
|
75 ParameterDescriptor d;
|
matthiasm@50
|
76 d.identifier = "useNNLS";
|
matthiasm@50
|
77 d.name = "use approximate transcription (NNLS)";
|
matthiasm@50
|
78 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@50
|
79 d.unit = "";
|
matthiasm@50
|
80 d.minValue = 0.0;
|
matthiasm@50
|
81 d.maxValue = 1.0;
|
matthiasm@50
|
82 d.defaultValue = 1.0;
|
matthiasm@50
|
83 d.isQuantized = true;
|
matthiasm@50
|
84 d.quantizeStep = 1.0;
|
matthiasm@50
|
85 list.push_back(d);
|
matthiasm@50
|
86
|
matthiasm@50
|
87 ParameterDescriptor d4;
|
matthiasm@50
|
88 d4.identifier = "useHMM";
|
matthiasm@53
|
89 d4.name = "HMM (Viterbi decoding)";
|
matthiasm@50
|
90 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
matthiasm@50
|
91 d4.unit = "";
|
matthiasm@50
|
92 d4.minValue = 0.0;
|
matthiasm@50
|
93 d4.maxValue = 1.0;
|
matthiasm@50
|
94 d4.defaultValue = 1.0;
|
matthiasm@50
|
95 d4.isQuantized = true;
|
matthiasm@50
|
96 d4.quantizeStep = 1.0;
|
matthiasm@50
|
97 list.push_back(d4);
|
matthiasm@50
|
98
|
matthiasm@50
|
99 ParameterDescriptor d0;
|
matthiasm@50
|
100 d0.identifier = "rollon";
|
matthiasm@50
|
101 d0.name = "spectral roll-on";
|
matthiasm@58
|
102 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
103 d0.unit = "%";
|
matthiasm@50
|
104 d0.minValue = 0;
|
mail@76
|
105 d0.maxValue = 5;
|
matthiasm@92
|
106 d0.defaultValue = 0.0;
|
matthiasm@50
|
107 d0.isQuantized = true;
|
mail@76
|
108 d0.quantizeStep = 0.5;
|
matthiasm@50
|
109 list.push_back(d0);
|
matthiasm@50
|
110
|
matthiasm@50
|
111 ParameterDescriptor d1;
|
matthiasm@50
|
112 d1.identifier = "tuningmode";
|
matthiasm@50
|
113 d1.name = "tuning mode";
|
matthiasm@50
|
114 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@50
|
115 d1.unit = "";
|
matthiasm@50
|
116 d1.minValue = 0;
|
matthiasm@50
|
117 d1.maxValue = 1;
|
matthiasm@92
|
118 d1.defaultValue = 0.0;
|
matthiasm@50
|
119 d1.isQuantized = true;
|
matthiasm@50
|
120 d1.valueNames.push_back("global tuning");
|
matthiasm@50
|
121 d1.valueNames.push_back("local tuning");
|
matthiasm@50
|
122 d1.quantizeStep = 1.0;
|
matthiasm@50
|
123 list.push_back(d1);
|
matthiasm@50
|
124
|
matthiasm@50
|
125 ParameterDescriptor d2;
|
matthiasm@50
|
126 d2.identifier = "whitening";
|
matthiasm@50
|
127 d2.name = "spectral whitening";
|
matthiasm@50
|
128 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
matthiasm@50
|
129 d2.unit = "";
|
matthiasm@50
|
130 d2.isQuantized = true;
|
matthiasm@50
|
131 d2.minValue = 0.0;
|
matthiasm@50
|
132 d2.maxValue = 1.0;
|
matthiasm@50
|
133 d2.defaultValue = 1.0;
|
matthiasm@50
|
134 d2.isQuantized = false;
|
matthiasm@50
|
135 list.push_back(d2);
|
matthiasm@50
|
136
|
matthiasm@50
|
137 ParameterDescriptor d3;
|
matthiasm@50
|
138 d3.identifier = "s";
|
matthiasm@50
|
139 d3.name = "spectral shape";
|
matthiasm@50
|
140 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
matthiasm@50
|
141 d3.unit = "";
|
matthiasm@50
|
142 d3.minValue = 0.5;
|
matthiasm@50
|
143 d3.maxValue = 0.9;
|
matthiasm@50
|
144 d3.defaultValue = 0.7;
|
matthiasm@50
|
145 d3.isQuantized = false;
|
matthiasm@50
|
146 list.push_back(d3);
|
matthiasm@50
|
147
|
mail@89
|
148 ParameterDescriptor boostn;
|
mail@89
|
149 boostn.identifier = "boostn";
|
mail@89
|
150 boostn.name = "boost N";
|
matthiasm@95
|
151 boostn.description = "Boost likelihood of the N (no chord) label.";
|
mail@89
|
152 boostn.unit = "";
|
matthiasm@95
|
153 boostn.minValue = 0.0;
|
matthiasm@95
|
154 boostn.maxValue = 1.0;
|
matthiasm@95
|
155 boostn.defaultValue = 0.1;
|
mail@89
|
156 boostn.isQuantized = false;
|
mail@89
|
157 list.push_back(boostn);
|
matthiasm@50
|
158
|
mail@112
|
159 ParameterDescriptor usehartesyntax;
|
mail@112
|
160 usehartesyntax.identifier = "usehartesyntax";
|
mail@112
|
161 usehartesyntax.name = "use Harte syntax";
|
mail@112
|
162 usehartesyntax.description = "Use the chord syntax proposed by Harte";
|
mail@112
|
163 usehartesyntax.unit = "";
|
mail@112
|
164 usehartesyntax.minValue = 0.0;
|
mail@112
|
165 usehartesyntax.maxValue = 1.0;
|
mail@112
|
166 usehartesyntax.defaultValue = 0.0;
|
mail@112
|
167 usehartesyntax.isQuantized = true;
|
mail@112
|
168 usehartesyntax.quantizeStep = 1.0;
|
mail@112
|
169 usehartesyntax.valueNames.push_back("no");
|
mail@112
|
170 usehartesyntax.valueNames.push_back("yes");
|
mail@112
|
171 list.push_back(usehartesyntax);
|
mail@112
|
172
|
matthiasm@50
|
173 return list;
|
matthiasm@50
|
174 }
|
matthiasm@50
|
175
|
Chris@35
|
176 Chordino::OutputList
|
Chris@35
|
177 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
178 {
|
Chris@23
|
179 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
180 OutputList list;
|
matthiasm@0
|
181
|
Chris@35
|
182 int index = 0;
|
matthiasm@0
|
183
|
matthiasm@0
|
184 OutputDescriptor d7;
|
matthiasm@0
|
185 d7.identifier = "simplechord";
|
Chris@36
|
186 d7.name = "Chord Estimate";
|
matthiasm@58
|
187 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
188 d7.unit = "";
|
matthiasm@0
|
189 d7.hasFixedBinCount = true;
|
matthiasm@0
|
190 d7.binCount = 0;
|
matthiasm@0
|
191 d7.hasKnownExtents = false;
|
matthiasm@0
|
192 d7.isQuantized = false;
|
matthiasm@0
|
193 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
194 d7.hasDuration = false;
|
matthiasm@0
|
195 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
196 list.push_back(d7);
|
Chris@35
|
197 m_outputChords = index++;
|
matthiasm@0
|
198
|
matthiasm@86
|
199 OutputDescriptor chordnotes;
|
matthiasm@86
|
200 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
201 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
202 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
203 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
204 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
205 chordnotes.binCount = 1;
|
matthiasm@86
|
206 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
207 chordnotes.minValue = 0;
|
matthiasm@86
|
208 chordnotes.maxValue = 127;
|
matthiasm@86
|
209 chordnotes.isQuantized = true;
|
matthiasm@86
|
210 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
211 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
212 chordnotes.hasDuration = true;
|
matthiasm@86
|
213 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
214 list.push_back(chordnotes);
|
matthiasm@86
|
215 m_outputChordnotes = index++;
|
matthiasm@86
|
216
|
Chris@23
|
217 OutputDescriptor d8;
|
mail@60
|
218 d8.identifier = "harmonicchange";
|
Chris@36
|
219 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
220 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
221 d8.unit = "";
|
matthiasm@17
|
222 d8.hasFixedBinCount = true;
|
matthiasm@17
|
223 d8.binCount = 1;
|
mail@60
|
224 d8.hasKnownExtents = false;
|
mail@60
|
225 // d8.minValue = 0.0;
|
mail@60
|
226 // d8.maxValue = 0.999;
|
matthiasm@17
|
227 d8.isQuantized = false;
|
matthiasm@17
|
228 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
229 d8.hasDuration = false;
|
matthiasm@17
|
230 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
231 list.push_back(d8);
|
Chris@35
|
232 m_outputHarmonicChange = index++;
|
matthiasm@1
|
233
|
matthiasm@107
|
234 OutputDescriptor loglikelihood;
|
matthiasm@107
|
235 loglikelihood.identifier = "loglikelihood";
|
matthiasm@107
|
236 loglikelihood.name = "chord estimate log-likelihood";
|
matthiasm@107
|
237 loglikelihood.description = ".";
|
matthiasm@107
|
238 loglikelihood.unit = "";
|
matthiasm@107
|
239 loglikelihood.hasFixedBinCount = true;
|
matthiasm@107
|
240 loglikelihood.binCount = 1;
|
matthiasm@107
|
241 loglikelihood.hasKnownExtents = false;
|
matthiasm@107
|
242 loglikelihood.isQuantized = false;
|
matthiasm@107
|
243 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@107
|
244 loglikelihood.hasDuration = false;
|
matthiasm@107
|
245 // loglikelihood.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@107
|
246 list.push_back(loglikelihood);
|
matthiasm@107
|
247 m_outputLoglikelihood = index++;
|
matthiasm@106
|
248
|
matthiasm@0
|
249 return list;
|
matthiasm@0
|
250 }
|
matthiasm@0
|
251
|
matthiasm@0
|
252 bool
|
Chris@35
|
253 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
254 {
|
Chris@23
|
255 if (debug_on) {
|
Chris@23
|
256 cerr << "--> initialise";
|
Chris@23
|
257 }
|
mail@76
|
258
|
Chris@35
|
259 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
260 return false;
|
Chris@35
|
261 }
|
mail@112
|
262 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_useHarte);
|
matthiasm@0
|
263 return true;
|
matthiasm@0
|
264 }
|
matthiasm@0
|
265
|
matthiasm@0
|
266 void
|
Chris@35
|
267 Chordino::reset()
|
matthiasm@0
|
268 {
|
Chris@23
|
269 if (debug_on) cerr << "--> reset";
|
Chris@35
|
270 NNLSBase::reset();
|
matthiasm@0
|
271 }
|
matthiasm@0
|
272
|
Chris@35
|
273 Chordino::FeatureSet
|
Chris@35
|
274 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
275 {
|
Chris@23
|
276 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
277
|
Chris@35
|
278 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
279
|
Chris@35
|
280 return FeatureSet();
|
matthiasm@0
|
281 }
|
matthiasm@0
|
282
|
Chris@35
|
283 Chordino::FeatureSet
|
Chris@35
|
284 Chordino::getRemainingFeatures()
|
matthiasm@0
|
285 {
|
mail@89
|
286 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
287 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
288 FeatureSet fsOut;
|
Chris@35
|
289 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
290 int nChord = m_chordnames.size();
|
Chris@23
|
291 //
|
Chris@23
|
292 /** Calculate Tuning
|
Chris@23
|
293 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
294 cumulative mean real and imag values)
|
Chris@23
|
295 **/
|
mail@80
|
296 float meanTuningImag = 0;
|
mail@80
|
297 float meanTuningReal = 0;
|
mail@80
|
298 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
299 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
300 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
301 }
|
Chris@23
|
302 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
303 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
304 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
305 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
306
|
Chris@23
|
307 char buffer0 [50];
|
matthiasm@1
|
308
|
Chris@23
|
309 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
310
|
matthiasm@1
|
311
|
Chris@23
|
312 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
313 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
314 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
315 **/
|
Chris@35
|
316 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
317
|
Chris@23
|
318 float tempValue = 0;
|
Chris@23
|
319 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
320 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
321 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
322 int count = 0;
|
matthiasm@1
|
323
|
Chris@35
|
324 FeatureList tunedSpec;
|
matthiasm@43
|
325 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
326
|
matthiasm@43
|
327 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
328
|
Chris@35
|
329 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
330 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
331 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
332 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
333 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
334 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
335 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
336
|
Chris@23
|
337 if (m_tuneLocal) {
|
Chris@23
|
338 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
339 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
340 }
|
matthiasm@1
|
341
|
mail@80
|
342 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
343
|
Chris@91
|
344 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@91
|
345 tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
346 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
347 }
|
matthiasm@1
|
348
|
matthiasm@43
|
349 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
350 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
351 vector<float> runningstd;
|
mail@77
|
352 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
353 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
354 }
|
Chris@23
|
355 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
356 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
357 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
358 if (runningstd[i] > 0) {
|
matthiasm@43
|
359 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
360 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
361 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
362 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
363 }
|
matthiasm@43
|
364 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
365 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
366 }
|
Chris@23
|
367 }
|
matthiasm@43
|
368 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
369 count++;
|
Chris@23
|
370 }
|
Chris@23
|
371 cerr << "done." << endl;
|
matthiasm@1
|
372
|
Chris@23
|
373 /** Semitone spectrum and chromagrams
|
Chris@23
|
374 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
375 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
376 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
377 bass and treble stacked onto each other).
|
Chris@23
|
378 **/
|
matthiasm@42
|
379 if (m_useNNLS == 0) {
|
Chris@35
|
380 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
381 } else {
|
Chris@35
|
382 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
383 }
|
matthiasm@13
|
384
|
matthiasm@1
|
385
|
matthiasm@43
|
386 vector<vector<double> > chordogram;
|
Chris@23
|
387 vector<vector<int> > scoreChordogram;
|
Chris@35
|
388 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
389 count = 0;
|
matthiasm@9
|
390
|
Chris@35
|
391 FeatureList chromaList;
|
matthiasm@43
|
392
|
matthiasm@43
|
393
|
Chris@35
|
394
|
Chris@35
|
395 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
396 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
397 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
398
|
matthiasm@43
|
399 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
400 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
401
|
mail@77
|
402 float b[nNote];
|
matthiasm@1
|
403
|
Chris@23
|
404 bool some_b_greater_zero = false;
|
Chris@23
|
405 float sumb = 0;
|
mail@77
|
406 for (int i = 0; i < nNote; i++) {
|
mail@77
|
407 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
408 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
409 sumb += b[i];
|
Chris@23
|
410 if (b[i] > 0) {
|
Chris@23
|
411 some_b_greater_zero = true;
|
Chris@23
|
412 }
|
Chris@23
|
413 }
|
matthiasm@1
|
414
|
Chris@23
|
415 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
416
|
Chris@23
|
417 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
418 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
419 float currval;
|
Chris@91
|
420 int iSemitone = 0;
|
matthiasm@1
|
421
|
Chris@23
|
422 if (some_b_greater_zero) {
|
matthiasm@42
|
423 if (m_useNNLS == 0) {
|
Chris@91
|
424 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
425 currval = 0;
|
mail@81
|
426 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
427 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
428 }
|
Chris@23
|
429 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
430 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
431 iSemitone++;
|
Chris@23
|
432 }
|
matthiasm@1
|
433
|
Chris@23
|
434 } else {
|
Chris@35
|
435 float x[84+1000];
|
Chris@23
|
436 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
437 vector<int> signifIndex;
|
Chris@23
|
438 int index=0;
|
Chris@23
|
439 sumb /= 84.0;
|
Chris@91
|
440 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
441 float currval = 0;
|
mail@81
|
442 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
443 currval += b[iNote + iBPS];
|
mail@81
|
444 }
|
Chris@23
|
445 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
446 index++;
|
Chris@23
|
447 }
|
Chris@35
|
448 float rnorm;
|
Chris@35
|
449 float w[84+1000];
|
Chris@35
|
450 float zz[84+1000];
|
Chris@23
|
451 int indx[84+1000];
|
Chris@23
|
452 int mode;
|
mail@77
|
453 int dictsize = nNote*signifIndex.size();
|
mail@81
|
454 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
455 float *curr_dict = new float[dictsize];
|
Chris@91
|
456 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
457 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
458 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
459 }
|
Chris@23
|
460 }
|
Chris@35
|
461 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
462 delete [] curr_dict;
|
Chris@91
|
463 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
464 // cerr << mode << endl;
|
Chris@23
|
465 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
466 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
467 }
|
Chris@23
|
468 }
|
Chris@23
|
469 }
|
Chris@35
|
470
|
Chris@35
|
471 vector<float> origchroma = chroma;
|
Chris@23
|
472 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
473 currentChromas.values = chroma;
|
Chris@35
|
474
|
Chris@23
|
475 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
476 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
477 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
478 case 0: // should never end up here
|
Chris@23
|
479 break;
|
Chris@23
|
480 case 1:
|
Chris@35
|
481 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
482 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
483 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
484 break;
|
Chris@23
|
485 case 2:
|
Chris@35
|
486 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
487 chromanorm[2] += *it;
|
Chris@23
|
488 }
|
Chris@23
|
489 break;
|
Chris@23
|
490 case 3:
|
Chris@35
|
491 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
492 chromanorm[2] += pow(*it,2);
|
Chris@23
|
493 }
|
Chris@23
|
494 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
495 break;
|
Chris@23
|
496 }
|
Chris@23
|
497 if (chromanorm[2] > 0) {
|
Chris@91
|
498 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
499 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
500 }
|
Chris@23
|
501 }
|
Chris@23
|
502 }
|
Chris@35
|
503
|
mail@113
|
504 if (*max_element(origchroma.begin(), origchroma.end()) == 0) {
|
mail@113
|
505 for (int i = 0; i < (int)chroma.size(); i++) {
|
mail@113
|
506 chroma[i] = 1;
|
mail@113
|
507 }
|
mail@113
|
508 }
|
mail@113
|
509
|
matthiasm@43
|
510 chromaList.push_back(currentChromas);
|
Chris@35
|
511
|
Chris@23
|
512 // local chord estimation
|
matthiasm@43
|
513 vector<double> currentChordSalience;
|
matthiasm@43
|
514 double tempchordvalue = 0;
|
matthiasm@43
|
515 double sumchordvalue = 0;
|
matthiasm@9
|
516
|
Chris@23
|
517 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
518 tempchordvalue = 0;
|
Chris@23
|
519 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
520 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
521 }
|
Chris@23
|
522 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
523 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
524 }
|
matthiasm@48
|
525 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
526 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
527 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
528 sumchordvalue+=tempchordvalue;
|
Chris@23
|
529 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
530 }
|
Chris@23
|
531 if (sumchordvalue > 0) {
|
Chris@23
|
532 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
533 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
534 }
|
Chris@23
|
535 } else {
|
Chris@23
|
536 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
537 }
|
Chris@23
|
538 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
539
|
Chris@23
|
540 count++;
|
Chris@23
|
541 }
|
Chris@23
|
542 cerr << "done." << endl;
|
matthiasm@13
|
543
|
matthiasm@86
|
544 vector<Feature> oldnotes;
|
matthiasm@10
|
545
|
matthiasm@50
|
546 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
547 if (m_useHMM == 1.0) {
|
matthiasm@44
|
548 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
549 int oldchord = nChord-1;
|
matthiasm@48
|
550 double selftransprob = 0.99;
|
matthiasm@43
|
551
|
matthiasm@48
|
552 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
553 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
554
|
matthiasm@50
|
555 double *delta;
|
matthiasm@50
|
556 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
557
|
matthiasm@43
|
558 vector<vector<double> > trans;
|
matthiasm@43
|
559 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
560 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
561 temp[iChord] = selftransprob;
|
matthiasm@43
|
562 trans.push_back(temp);
|
matthiasm@43
|
563 }
|
matthiasm@106
|
564 vector<double> scale;
|
matthiasm@106
|
565 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
|
matthiasm@106
|
566
|
matthiasm@48
|
567
|
matthiasm@48
|
568 Feature chord_feature; // chord estimate
|
matthiasm@48
|
569 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
570 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
571 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
572 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
573
|
mail@60
|
574 chordchange[0] = 0;
|
Chris@91
|
575 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@43
|
576 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
577 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@86
|
578 // chord
|
matthiasm@43
|
579 Feature chord_feature; // chord estimate
|
matthiasm@43
|
580 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
581 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
582 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
583 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
584 oldchord = chordpath[iFrame];
|
matthiasm@86
|
585 // chord notes
|
Chris@91
|
586 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
587 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@86
|
588 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
589 }
|
matthiasm@86
|
590 oldnotes.clear();
|
Chris@91
|
591 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
592 Feature chordnote_feature;
|
matthiasm@86
|
593 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
594 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@86
|
595 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@86
|
596 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
597 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@86
|
598 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
599 }
|
Chris@23
|
600 }
|
matthiasm@50
|
601 /* calculating simple chord change prob */
|
matthiasm@50
|
602 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
603 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
604 }
|
Chris@23
|
605 }
|
matthiasm@43
|
606
|
matthiasm@106
|
607 float logscale = 0;
|
matthiasm@106
|
608 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@106
|
609 logscale -= log(scale[iFrame]);
|
matthiasm@106
|
610 Feature loglikelihood;
|
matthiasm@106
|
611 loglikelihood.hasTimestamp = true;
|
matthiasm@106
|
612 loglikelihood.timestamp = timestamps[iFrame];
|
matthiasm@106
|
613 loglikelihood.values.push_back(-log(scale[iFrame]));
|
matthiasm@106
|
614 // cerr << chordchange[iFrame] << endl;
|
matthiasm@107
|
615 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
|
matthiasm@106
|
616 }
|
matthiasm@106
|
617 logscale /= nFrame;
|
mail@111
|
618 // cerr << "loglik" << logscale << endl;
|
matthiasm@106
|
619
|
matthiasm@106
|
620
|
matthiasm@43
|
621 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
622 } else {
|
matthiasm@43
|
623 /* Simple chord estimation
|
matthiasm@43
|
624 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
625 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
626 */
|
matthiasm@44
|
627 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
628 count = 0;
|
matthiasm@43
|
629 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
630 vector<int> chordSequence;
|
matthiasm@43
|
631 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
632 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
633 scoreChordogram.push_back(temp);
|
matthiasm@43
|
634 }
|
matthiasm@43
|
635 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
636 int startIndex = count + 1;
|
matthiasm@43
|
637 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
638
|
matthiasm@43
|
639 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
640
|
matthiasm@43
|
641 vector<int> chordCandidates;
|
Chris@91
|
642 for (int iChord = 0; iChord+1 < nChord; iChord++) {
|
matthiasm@43
|
643 // float currsum = 0;
|
Chris@91
|
644 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
645 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
646 // }
|
matthiasm@43
|
647 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@91
|
648 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
649 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
650 chordCandidates.push_back(iChord);
|
matthiasm@43
|
651 break;
|
matthiasm@43
|
652 }
|
Chris@23
|
653 }
|
Chris@23
|
654 }
|
matthiasm@43
|
655 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
656 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
657
|
matthiasm@43
|
658 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
659 float maxindex = 0; //... and the index thereof
|
Chris@91
|
660 int bestchordL = nChord-1; // index of the best "left" chord
|
Chris@91
|
661 int bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
662
|
matthiasm@43
|
663 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
664 // now find the max values on both sides of iWF
|
matthiasm@43
|
665 // left side:
|
matthiasm@43
|
666 float maxL = 0;
|
Chris@91
|
667 int maxindL = nChord-1;
|
Chris@91
|
668 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
669 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
670 float currsum = 0;
|
Chris@91
|
671 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
672 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
673 }
|
matthiasm@43
|
674 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
675 if (currsum > maxL) {
|
matthiasm@43
|
676 maxL = currsum;
|
matthiasm@43
|
677 maxindL = iChord;
|
matthiasm@43
|
678 }
|
matthiasm@43
|
679 }
|
matthiasm@43
|
680 // right side:
|
matthiasm@43
|
681 float maxR = 0;
|
Chris@91
|
682 int maxindR = nChord-1;
|
Chris@91
|
683 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
684 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
685 float currsum = 0;
|
Chris@91
|
686 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
687 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
688 }
|
matthiasm@43
|
689 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
690 if (currsum > maxR) {
|
matthiasm@43
|
691 maxR = currsum;
|
matthiasm@43
|
692 maxindR = iChord;
|
matthiasm@43
|
693 }
|
matthiasm@43
|
694 }
|
matthiasm@43
|
695 if (maxL+maxR > maxval) {
|
matthiasm@43
|
696 maxval = maxL+maxR;
|
matthiasm@43
|
697 maxindex = iWF;
|
matthiasm@43
|
698 bestchordL = maxindL;
|
matthiasm@43
|
699 bestchordR = maxindR;
|
matthiasm@43
|
700 }
|
matthiasm@43
|
701
|
Chris@23
|
702 }
|
matthiasm@43
|
703 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
704 // add a score to every chord-frame-point that was part of a maximum
|
Chris@91
|
705 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
706 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
707 }
|
Chris@91
|
708 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
709 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
710 }
|
matthiasm@50
|
711 if (bestchordL != bestchordR) {
|
matthiasm@50
|
712 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
713 }
|
matthiasm@43
|
714 count++;
|
Chris@23
|
715 }
|
matthiasm@43
|
716 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
717 count = 0;
|
matthiasm@43
|
718 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
719 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
720 float maxindex = 0; //... and the index thereof
|
Chris@91
|
721 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
722 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
723 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
724 maxindex = iChord;
|
matthiasm@43
|
725 // cerr << iChord << endl;
|
matthiasm@43
|
726 }
|
matthiasm@43
|
727 }
|
matthiasm@43
|
728 chordSequence.push_back(maxindex);
|
matthiasm@43
|
729 count++;
|
Chris@23
|
730 }
|
matthiasm@43
|
731
|
matthiasm@43
|
732
|
matthiasm@43
|
733 // mode filter on chordSequence
|
matthiasm@43
|
734 count = 0;
|
matthiasm@43
|
735 string oldChord = "";
|
matthiasm@43
|
736 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
737 Feature chord_feature; // chord estimate
|
matthiasm@43
|
738 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
739 chord_feature.timestamp = *it;
|
matthiasm@43
|
740 // Feature currentChord; // chord estimate
|
matthiasm@43
|
741 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
742 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
743
|
matthiasm@43
|
744 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
745 int maxChordCount = 0;
|
matthiasm@43
|
746 int maxChordIndex = nChord-1;
|
matthiasm@43
|
747 string maxChord;
|
matthiasm@43
|
748 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
749 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
750 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
751 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
752 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
753 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
754 maxChordCount++;
|
matthiasm@43
|
755 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
756 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
757 }
|
matthiasm@43
|
758 }
|
matthiasm@43
|
759 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
760 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
761 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
762 if (oldChord != maxChord) {
|
matthiasm@43
|
763 oldChord = maxChord;
|
matthiasm@43
|
764 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
765 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@91
|
766 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
767 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
|
matthiasm@86
|
768 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
769 }
|
matthiasm@86
|
770 oldnotes.clear();
|
Chris@91
|
771 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
772 Feature chordnote_feature;
|
matthiasm@86
|
773 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
774 chordnote_feature.timestamp = chord_feature.timestamp;
|
matthiasm@86
|
775 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
|
matthiasm@86
|
776 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
777 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
|
matthiasm@86
|
778 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
779 }
|
matthiasm@43
|
780 }
|
matthiasm@43
|
781 count++;
|
Chris@23
|
782 }
|
Chris@23
|
783 }
|
matthiasm@43
|
784 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
785 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
786 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
787 chord_feature.label = "N";
|
mail@60
|
788 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
789
|
Chris@91
|
790 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
791 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
792 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
793 }
|
matthiasm@86
|
794
|
Chris@23
|
795 cerr << "done." << endl;
|
matthiasm@50
|
796
|
matthiasm@50
|
797 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
798 Feature chordchange_feature;
|
matthiasm@50
|
799 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
800 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
801 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
802 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
803 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
804 }
|
matthiasm@50
|
805
|
mail@60
|
806 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
807
|
matthiasm@50
|
808
|
Chris@23
|
809 return fsOut;
|
matthiasm@0
|
810 }
|