Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
39 }
|
matthiasm@0
|
40
|
Chris@35
|
41 Chordino::~Chordino()
|
matthiasm@0
|
42 {
|
Chris@35
|
43 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
44 }
|
matthiasm@0
|
45
|
matthiasm@0
|
46 string
|
Chris@35
|
47 Chordino::getIdentifier() const
|
matthiasm@0
|
48 {
|
Chris@23
|
49 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
50 return "chordino";
|
matthiasm@0
|
51 }
|
matthiasm@0
|
52
|
matthiasm@0
|
53 string
|
Chris@35
|
54 Chordino::getName() const
|
matthiasm@0
|
55 {
|
Chris@23
|
56 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
57 return "Chordino";
|
matthiasm@0
|
58 }
|
matthiasm@0
|
59
|
matthiasm@0
|
60 string
|
Chris@35
|
61 Chordino::getDescription() const
|
matthiasm@0
|
62 {
|
Chris@23
|
63 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
65 }
|
matthiasm@0
|
66
|
matthiasm@50
|
67 Chordino::ParameterList
|
matthiasm@50
|
68 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
69 {
|
matthiasm@50
|
70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
71 ParameterList list;
|
matthiasm@50
|
72
|
mail@118
|
73 ParameterDescriptor useNNLSParam;
|
mail@118
|
74 useNNLSParam.identifier = "useNNLS";
|
mail@118
|
75 useNNLSParam.name = "use approximate transcription (NNLS)";
|
mail@118
|
76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
|
mail@118
|
77 useNNLSParam.unit = "";
|
mail@118
|
78 useNNLSParam.minValue = 0.0;
|
mail@118
|
79 useNNLSParam.maxValue = 1.0;
|
mail@118
|
80 useNNLSParam.defaultValue = 1.0;
|
mail@118
|
81 useNNLSParam.isQuantized = true;
|
mail@118
|
82 useNNLSParam.quantizeStep = 1.0;
|
mail@118
|
83 list.push_back(useNNLSParam);
|
matthiasm@50
|
84
|
mail@118
|
85 ParameterDescriptor useHMMParam;
|
mail@118
|
86 useHMMParam.identifier = "useHMM";
|
mail@118
|
87 useHMMParam.name = "HMM (Viterbi decoding)";
|
mail@118
|
88 useHMMParam.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
mail@118
|
89 useHMMParam.unit = "";
|
mail@118
|
90 useHMMParam.minValue = 0.0;
|
mail@118
|
91 useHMMParam.maxValue = 1.0;
|
mail@118
|
92 useHMMParam.defaultValue = 1.0;
|
mail@118
|
93 useHMMParam.isQuantized = true;
|
mail@118
|
94 useHMMParam.quantizeStep = 1.0;
|
mail@118
|
95 list.push_back(useHMMParam);
|
matthiasm@50
|
96
|
mail@118
|
97 ParameterDescriptor rollonParam;
|
mail@118
|
98 rollonParam.identifier = "rollon";
|
mail@118
|
99 rollonParam.name = "bass noise threshold";
|
mail@118
|
100 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
|
mail@118
|
101 rollonParam.unit = "%";
|
mail@118
|
102 rollonParam.minValue = 0;
|
mail@118
|
103 rollonParam.maxValue = 5;
|
mail@118
|
104 rollonParam.defaultValue = 0.0;
|
mail@118
|
105 rollonParam.isQuantized = true;
|
mail@118
|
106 rollonParam.quantizeStep = 0.5;
|
mail@118
|
107 list.push_back(rollonParam);
|
matthiasm@50
|
108
|
mail@118
|
109 ParameterDescriptor tuningmodeParam;
|
mail@118
|
110 tuningmodeParam.identifier = "tuningmode";
|
mail@118
|
111 tuningmodeParam.name = "tuning mode";
|
mail@118
|
112 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
mail@118
|
113 tuningmodeParam.unit = "";
|
mail@118
|
114 tuningmodeParam.minValue = 0;
|
mail@118
|
115 tuningmodeParam.maxValue = 1;
|
mail@118
|
116 tuningmodeParam.defaultValue = 0.0;
|
mail@118
|
117 tuningmodeParam.isQuantized = true;
|
mail@118
|
118 tuningmodeParam.valueNames.push_back("global tuning");
|
mail@118
|
119 tuningmodeParam.valueNames.push_back("local tuning");
|
mail@118
|
120 tuningmodeParam.quantizeStep = 1.0;
|
mail@118
|
121 list.push_back(tuningmodeParam);
|
matthiasm@50
|
122
|
mail@118
|
123 ParameterDescriptor whiteningParam;
|
mail@118
|
124 whiteningParam.identifier = "whitening";
|
mail@118
|
125 whiteningParam.name = "spectral whitening";
|
mail@118
|
126 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@118
|
127 whiteningParam.unit = "";
|
mail@118
|
128 whiteningParam.isQuantized = true;
|
mail@118
|
129 whiteningParam.minValue = 0.0;
|
mail@118
|
130 whiteningParam.maxValue = 1.0;
|
mail@118
|
131 whiteningParam.defaultValue = 1.0;
|
mail@118
|
132 whiteningParam.isQuantized = false;
|
mail@118
|
133 list.push_back(whiteningParam);
|
matthiasm@50
|
134
|
mail@118
|
135 ParameterDescriptor spectralShapeParam;
|
mail@118
|
136 spectralShapeParam.identifier = "spectralshape";
|
mail@118
|
137 spectralShapeParam.name = "spectral shape";
|
mail@118
|
138 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@118
|
139 spectralShapeParam.unit = "";
|
mail@118
|
140 spectralShapeParam.minValue = 0.5;
|
mail@118
|
141 spectralShapeParam.maxValue = 0.9;
|
mail@118
|
142 spectralShapeParam.defaultValue = 0.7;
|
mail@118
|
143 spectralShapeParam.isQuantized = false;
|
mail@118
|
144 list.push_back(spectralShapeParam);
|
matthiasm@50
|
145
|
mail@118
|
146 ParameterDescriptor boostnParam;
|
mail@118
|
147 boostnParam.identifier = "boostn";
|
mail@118
|
148 boostnParam.name = "boost N";
|
mail@118
|
149 boostnParam.description = "Boost likelihood of the N (no chord) label.";
|
mail@118
|
150 boostnParam.unit = "";
|
mail@118
|
151 boostnParam.minValue = 0.0;
|
mail@118
|
152 boostnParam.maxValue = 1.0;
|
mail@118
|
153 boostnParam.defaultValue = 0.1;
|
mail@118
|
154 boostnParam.isQuantized = false;
|
mail@118
|
155 list.push_back(boostnParam);
|
matthiasm@50
|
156
|
mail@118
|
157 ParameterDescriptor usehartesyntaxParam;
|
mail@118
|
158 usehartesyntaxParam.identifier = "usehartesyntax";
|
mail@118
|
159 usehartesyntaxParam.name = "use Harte syntax";
|
mail@118
|
160 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
|
mail@118
|
161 usehartesyntaxParam.unit = "";
|
mail@118
|
162 usehartesyntaxParam.minValue = 0.0;
|
mail@118
|
163 usehartesyntaxParam.maxValue = 1.0;
|
mail@118
|
164 usehartesyntaxParam.defaultValue = 0.0;
|
mail@118
|
165 usehartesyntaxParam.isQuantized = true;
|
mail@118
|
166 usehartesyntaxParam.quantizeStep = 1.0;
|
mail@118
|
167 usehartesyntaxParam.valueNames.push_back("no");
|
mail@118
|
168 usehartesyntaxParam.valueNames.push_back("yes");
|
mail@118
|
169 list.push_back(usehartesyntaxParam);
|
mail@112
|
170
|
matthiasm@50
|
171 return list;
|
matthiasm@50
|
172 }
|
matthiasm@50
|
173
|
Chris@35
|
174 Chordino::OutputList
|
Chris@35
|
175 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
176 {
|
Chris@23
|
177 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
178 OutputList list;
|
matthiasm@0
|
179
|
Chris@35
|
180 int index = 0;
|
matthiasm@0
|
181
|
matthiasm@0
|
182 OutputDescriptor d7;
|
matthiasm@0
|
183 d7.identifier = "simplechord";
|
Chris@36
|
184 d7.name = "Chord Estimate";
|
matthiasm@58
|
185 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
186 d7.unit = "";
|
matthiasm@0
|
187 d7.hasFixedBinCount = true;
|
matthiasm@0
|
188 d7.binCount = 0;
|
matthiasm@0
|
189 d7.hasKnownExtents = false;
|
matthiasm@0
|
190 d7.isQuantized = false;
|
matthiasm@0
|
191 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
192 d7.hasDuration = false;
|
matthiasm@0
|
193 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
194 list.push_back(d7);
|
Chris@35
|
195 m_outputChords = index++;
|
matthiasm@0
|
196
|
matthiasm@86
|
197 OutputDescriptor chordnotes;
|
matthiasm@86
|
198 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
199 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
200 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
201 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
202 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
203 chordnotes.binCount = 1;
|
matthiasm@86
|
204 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
205 chordnotes.minValue = 0;
|
matthiasm@86
|
206 chordnotes.maxValue = 127;
|
matthiasm@86
|
207 chordnotes.isQuantized = true;
|
matthiasm@86
|
208 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
209 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
210 chordnotes.hasDuration = true;
|
matthiasm@86
|
211 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
212 list.push_back(chordnotes);
|
matthiasm@86
|
213 m_outputChordnotes = index++;
|
matthiasm@86
|
214
|
Chris@23
|
215 OutputDescriptor d8;
|
mail@60
|
216 d8.identifier = "harmonicchange";
|
Chris@36
|
217 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
218 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
219 d8.unit = "";
|
matthiasm@17
|
220 d8.hasFixedBinCount = true;
|
matthiasm@17
|
221 d8.binCount = 1;
|
mail@60
|
222 d8.hasKnownExtents = false;
|
mail@60
|
223 // d8.minValue = 0.0;
|
mail@60
|
224 // d8.maxValue = 0.999;
|
matthiasm@17
|
225 d8.isQuantized = false;
|
matthiasm@17
|
226 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
227 d8.hasDuration = false;
|
matthiasm@17
|
228 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
229 list.push_back(d8);
|
Chris@35
|
230 m_outputHarmonicChange = index++;
|
matthiasm@1
|
231
|
matthiasm@107
|
232 OutputDescriptor loglikelihood;
|
matthiasm@107
|
233 loglikelihood.identifier = "loglikelihood";
|
mail@127
|
234 loglikelihood.name = "Log-Likelihood of Chord Estimate";
|
mail@127
|
235 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
|
matthiasm@107
|
236 loglikelihood.unit = "";
|
matthiasm@107
|
237 loglikelihood.hasFixedBinCount = true;
|
matthiasm@107
|
238 loglikelihood.binCount = 1;
|
matthiasm@107
|
239 loglikelihood.hasKnownExtents = false;
|
matthiasm@107
|
240 loglikelihood.isQuantized = false;
|
matthiasm@107
|
241 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@107
|
242 loglikelihood.hasDuration = false;
|
matthiasm@107
|
243 list.push_back(loglikelihood);
|
matthiasm@107
|
244 m_outputLoglikelihood = index++;
|
matthiasm@106
|
245
|
matthiasm@0
|
246 return list;
|
matthiasm@0
|
247 }
|
matthiasm@0
|
248
|
matthiasm@0
|
249 bool
|
Chris@35
|
250 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
251 {
|
Chris@23
|
252 if (debug_on) {
|
Chris@23
|
253 cerr << "--> initialise";
|
Chris@23
|
254 }
|
mail@76
|
255
|
Chris@35
|
256 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
257 return false;
|
Chris@35
|
258 }
|
mail@115
|
259 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
|
matthiasm@0
|
260 return true;
|
matthiasm@0
|
261 }
|
matthiasm@0
|
262
|
matthiasm@0
|
263 void
|
Chris@35
|
264 Chordino::reset()
|
matthiasm@0
|
265 {
|
Chris@23
|
266 if (debug_on) cerr << "--> reset";
|
Chris@35
|
267 NNLSBase::reset();
|
matthiasm@0
|
268 }
|
matthiasm@0
|
269
|
Chris@35
|
270 Chordino::FeatureSet
|
Chris@35
|
271 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
272 {
|
Chris@23
|
273 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
274
|
Chris@35
|
275 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
276
|
Chris@35
|
277 return FeatureSet();
|
matthiasm@0
|
278 }
|
matthiasm@0
|
279
|
Chris@35
|
280 Chordino::FeatureSet
|
Chris@35
|
281 Chordino::getRemainingFeatures()
|
matthiasm@0
|
282 {
|
mail@89
|
283 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
284 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
285 FeatureSet fsOut;
|
Chris@35
|
286 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
287 int nChord = m_chordnames.size();
|
Chris@23
|
288 //
|
Chris@23
|
289 /** Calculate Tuning
|
Chris@23
|
290 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
291 cumulative mean real and imag values)
|
Chris@23
|
292 **/
|
mail@80
|
293 float meanTuningImag = 0;
|
mail@80
|
294 float meanTuningReal = 0;
|
mail@80
|
295 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
296 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
297 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
298 }
|
Chris@23
|
299 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
300 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
301 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
302 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
303
|
Chris@23
|
304 char buffer0 [50];
|
matthiasm@1
|
305
|
Chris@23
|
306 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
307
|
matthiasm@1
|
308
|
Chris@23
|
309 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
310 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
311 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
312 **/
|
Chris@35
|
313 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
314
|
Chris@23
|
315 int count = 0;
|
matthiasm@1
|
316
|
Chris@35
|
317 FeatureList tunedSpec;
|
matthiasm@43
|
318 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
319
|
matthiasm@43
|
320 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
321
|
Chris@35
|
322 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
323 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
324 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
325 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
326 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
327 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
328 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
329
|
Chris@23
|
330 if (m_tuneLocal) {
|
Chris@23
|
331 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
332 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
333 }
|
matthiasm@1
|
334
|
mail@80
|
335 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
336
|
Chris@91
|
337 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@115
|
338 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
339 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
340 }
|
matthiasm@1
|
341
|
matthiasm@43
|
342 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
343 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
344 vector<float> runningstd;
|
mail@77
|
345 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
346 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
347 }
|
Chris@23
|
348 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
349 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
350 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
351 if (runningstd[i] > 0) {
|
matthiasm@43
|
352 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
353 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
354 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
355 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
356 }
|
matthiasm@43
|
357 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
358 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
359 }
|
Chris@23
|
360 }
|
matthiasm@43
|
361 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
362 count++;
|
Chris@23
|
363 }
|
Chris@23
|
364 cerr << "done." << endl;
|
matthiasm@1
|
365
|
Chris@23
|
366 /** Semitone spectrum and chromagrams
|
Chris@23
|
367 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
368 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
369 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
370 bass and treble stacked onto each other).
|
Chris@23
|
371 **/
|
matthiasm@42
|
372 if (m_useNNLS == 0) {
|
Chris@35
|
373 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
374 } else {
|
Chris@35
|
375 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
376 }
|
matthiasm@13
|
377
|
matthiasm@1
|
378
|
matthiasm@43
|
379 vector<vector<double> > chordogram;
|
Chris@23
|
380 vector<vector<int> > scoreChordogram;
|
Chris@35
|
381 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
382 count = 0;
|
matthiasm@9
|
383
|
Chris@35
|
384 FeatureList chromaList;
|
matthiasm@43
|
385
|
matthiasm@43
|
386
|
Chris@35
|
387
|
Chris@35
|
388 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
389 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
390 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
391
|
matthiasm@43
|
392 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
393 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
394
|
mail@77
|
395 float b[nNote];
|
matthiasm@1
|
396
|
Chris@23
|
397 bool some_b_greater_zero = false;
|
Chris@23
|
398 float sumb = 0;
|
mail@77
|
399 for (int i = 0; i < nNote; i++) {
|
mail@77
|
400 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
401 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
402 sumb += b[i];
|
Chris@23
|
403 if (b[i] > 0) {
|
Chris@23
|
404 some_b_greater_zero = true;
|
Chris@23
|
405 }
|
Chris@23
|
406 }
|
matthiasm@1
|
407
|
Chris@23
|
408 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
409
|
Chris@23
|
410 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
411 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
412 float currval;
|
Chris@91
|
413 int iSemitone = 0;
|
matthiasm@1
|
414
|
Chris@23
|
415 if (some_b_greater_zero) {
|
matthiasm@42
|
416 if (m_useNNLS == 0) {
|
Chris@91
|
417 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
418 currval = 0;
|
mail@81
|
419 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
420 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
421 }
|
Chris@23
|
422 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
423 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
424 iSemitone++;
|
Chris@23
|
425 }
|
matthiasm@1
|
426
|
Chris@23
|
427 } else {
|
Chris@35
|
428 float x[84+1000];
|
Chris@23
|
429 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
430 vector<int> signifIndex;
|
Chris@23
|
431 int index=0;
|
Chris@23
|
432 sumb /= 84.0;
|
Chris@91
|
433 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
434 float currval = 0;
|
mail@81
|
435 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
436 currval += b[iNote + iBPS];
|
mail@81
|
437 }
|
Chris@23
|
438 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
439 index++;
|
Chris@23
|
440 }
|
Chris@35
|
441 float rnorm;
|
Chris@35
|
442 float w[84+1000];
|
Chris@35
|
443 float zz[84+1000];
|
Chris@23
|
444 int indx[84+1000];
|
Chris@23
|
445 int mode;
|
mail@77
|
446 int dictsize = nNote*signifIndex.size();
|
mail@81
|
447 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
448 float *curr_dict = new float[dictsize];
|
Chris@91
|
449 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
450 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
451 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
452 }
|
Chris@23
|
453 }
|
Chris@35
|
454 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
455 delete [] curr_dict;
|
Chris@91
|
456 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
457 // cerr << mode << endl;
|
Chris@23
|
458 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
459 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
460 }
|
Chris@23
|
461 }
|
Chris@23
|
462 }
|
Chris@35
|
463
|
Chris@35
|
464 vector<float> origchroma = chroma;
|
Chris@23
|
465 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
466 currentChromas.values = chroma;
|
Chris@35
|
467
|
Chris@23
|
468 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
469 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
470 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
471 case 0: // should never end up here
|
Chris@23
|
472 break;
|
Chris@23
|
473 case 1:
|
Chris@35
|
474 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
475 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
476 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
477 break;
|
Chris@23
|
478 case 2:
|
Chris@35
|
479 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
480 chromanorm[2] += *it;
|
Chris@23
|
481 }
|
Chris@23
|
482 break;
|
Chris@23
|
483 case 3:
|
Chris@35
|
484 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
485 chromanorm[2] += pow(*it,2);
|
Chris@23
|
486 }
|
Chris@23
|
487 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
488 break;
|
Chris@23
|
489 }
|
Chris@23
|
490 if (chromanorm[2] > 0) {
|
Chris@91
|
491 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
492 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
493 }
|
Chris@23
|
494 }
|
Chris@23
|
495 }
|
Chris@35
|
496
|
mail@125
|
497 if (*max_element(origchroma.begin(), origchroma.end()) == 0) {
|
mail@125
|
498 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@122
|
499 chroma[i] = 1;
|
matthiasm@122
|
500 }
|
mail@125
|
501 }
|
mail@113
|
502
|
matthiasm@43
|
503 chromaList.push_back(currentChromas);
|
Chris@35
|
504
|
Chris@23
|
505 // local chord estimation
|
matthiasm@43
|
506 vector<double> currentChordSalience;
|
matthiasm@43
|
507 double tempchordvalue = 0;
|
matthiasm@43
|
508 double sumchordvalue = 0;
|
matthiasm@9
|
509
|
Chris@23
|
510 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
511 tempchordvalue = 0;
|
Chris@23
|
512 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
513 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
514 }
|
Chris@23
|
515 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
516 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
517 }
|
matthiasm@48
|
518 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
519 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
520 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
521 sumchordvalue+=tempchordvalue;
|
Chris@23
|
522 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
523 }
|
Chris@23
|
524 if (sumchordvalue > 0) {
|
Chris@23
|
525 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
526 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
527 }
|
Chris@23
|
528 } else {
|
Chris@23
|
529 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
530 }
|
Chris@23
|
531 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
532
|
Chris@23
|
533 count++;
|
Chris@23
|
534 }
|
Chris@23
|
535 cerr << "done." << endl;
|
matthiasm@13
|
536
|
matthiasm@86
|
537 vector<Feature> oldnotes;
|
matthiasm@10
|
538
|
matthiasm@50
|
539 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
540 if (m_useHMM == 1.0) {
|
matthiasm@44
|
541 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
542 int oldchord = nChord-1;
|
matthiasm@48
|
543 double selftransprob = 0.99;
|
matthiasm@43
|
544
|
matthiasm@48
|
545 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
546 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
547
|
matthiasm@50
|
548 double *delta;
|
matthiasm@50
|
549 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
550
|
matthiasm@43
|
551 vector<vector<double> > trans;
|
matthiasm@43
|
552 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
553 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
554 temp[iChord] = selftransprob;
|
matthiasm@43
|
555 trans.push_back(temp);
|
matthiasm@43
|
556 }
|
matthiasm@106
|
557 vector<double> scale;
|
matthiasm@106
|
558 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
|
matthiasm@106
|
559
|
matthiasm@48
|
560
|
matthiasm@48
|
561 Feature chord_feature; // chord estimate
|
matthiasm@48
|
562 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
563 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
564 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
565 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
566
|
mail@60
|
567 chordchange[0] = 0;
|
Chris@91
|
568 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@43
|
569 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
570 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@86
|
571 // chord
|
matthiasm@43
|
572 Feature chord_feature; // chord estimate
|
matthiasm@43
|
573 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
574 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
575 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
576 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
577 oldchord = chordpath[iFrame];
|
matthiasm@86
|
578 // chord notes
|
Chris@91
|
579 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
580 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@86
|
581 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
582 }
|
matthiasm@86
|
583 oldnotes.clear();
|
Chris@91
|
584 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
585 Feature chordnote_feature;
|
matthiasm@86
|
586 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
587 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@86
|
588 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@86
|
589 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
590 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@86
|
591 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
592 }
|
Chris@23
|
593 }
|
matthiasm@50
|
594 /* calculating simple chord change prob */
|
matthiasm@50
|
595 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
596 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
597 }
|
Chris@23
|
598 }
|
matthiasm@43
|
599
|
matthiasm@106
|
600 float logscale = 0;
|
matthiasm@106
|
601 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@106
|
602 logscale -= log(scale[iFrame]);
|
matthiasm@106
|
603 Feature loglikelihood;
|
matthiasm@106
|
604 loglikelihood.hasTimestamp = true;
|
matthiasm@106
|
605 loglikelihood.timestamp = timestamps[iFrame];
|
matthiasm@106
|
606 loglikelihood.values.push_back(-log(scale[iFrame]));
|
matthiasm@106
|
607 // cerr << chordchange[iFrame] << endl;
|
matthiasm@107
|
608 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
|
matthiasm@106
|
609 }
|
matthiasm@106
|
610 logscale /= nFrame;
|
mail@111
|
611 // cerr << "loglik" << logscale << endl;
|
matthiasm@106
|
612
|
matthiasm@106
|
613
|
matthiasm@43
|
614 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
615 } else {
|
matthiasm@43
|
616 /* Simple chord estimation
|
matthiasm@43
|
617 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
618 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
619 */
|
matthiasm@44
|
620 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
621 count = 0;
|
matthiasm@43
|
622 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
623 vector<int> chordSequence;
|
matthiasm@43
|
624 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
625 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
626 scoreChordogram.push_back(temp);
|
matthiasm@43
|
627 }
|
matthiasm@43
|
628 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
629 int startIndex = count + 1;
|
matthiasm@43
|
630 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
631
|
matthiasm@43
|
632 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
633
|
matthiasm@43
|
634 vector<int> chordCandidates;
|
Chris@91
|
635 for (int iChord = 0; iChord+1 < nChord; iChord++) {
|
matthiasm@43
|
636 // float currsum = 0;
|
Chris@91
|
637 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
638 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
639 // }
|
matthiasm@43
|
640 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@91
|
641 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
642 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
643 chordCandidates.push_back(iChord);
|
matthiasm@43
|
644 break;
|
matthiasm@43
|
645 }
|
Chris@23
|
646 }
|
Chris@23
|
647 }
|
matthiasm@43
|
648 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
649 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
650
|
matthiasm@43
|
651 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
652 float maxindex = 0; //... and the index thereof
|
Chris@91
|
653 int bestchordL = nChord-1; // index of the best "left" chord
|
Chris@91
|
654 int bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
655
|
matthiasm@43
|
656 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
657 // now find the max values on both sides of iWF
|
matthiasm@43
|
658 // left side:
|
matthiasm@43
|
659 float maxL = 0;
|
Chris@91
|
660 int maxindL = nChord-1;
|
Chris@91
|
661 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
662 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
663 float currsum = 0;
|
Chris@91
|
664 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
665 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
666 }
|
matthiasm@43
|
667 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
668 if (currsum > maxL) {
|
matthiasm@43
|
669 maxL = currsum;
|
matthiasm@43
|
670 maxindL = iChord;
|
matthiasm@43
|
671 }
|
matthiasm@43
|
672 }
|
matthiasm@43
|
673 // right side:
|
matthiasm@43
|
674 float maxR = 0;
|
Chris@91
|
675 int maxindR = nChord-1;
|
Chris@91
|
676 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
677 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
678 float currsum = 0;
|
Chris@91
|
679 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
680 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
681 }
|
matthiasm@43
|
682 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
683 if (currsum > maxR) {
|
matthiasm@43
|
684 maxR = currsum;
|
matthiasm@43
|
685 maxindR = iChord;
|
matthiasm@43
|
686 }
|
matthiasm@43
|
687 }
|
matthiasm@43
|
688 if (maxL+maxR > maxval) {
|
matthiasm@43
|
689 maxval = maxL+maxR;
|
matthiasm@43
|
690 maxindex = iWF;
|
matthiasm@43
|
691 bestchordL = maxindL;
|
matthiasm@43
|
692 bestchordR = maxindR;
|
matthiasm@43
|
693 }
|
matthiasm@43
|
694
|
Chris@23
|
695 }
|
matthiasm@43
|
696 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
697 // add a score to every chord-frame-point that was part of a maximum
|
Chris@91
|
698 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
699 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
700 }
|
Chris@91
|
701 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
702 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
703 }
|
matthiasm@50
|
704 if (bestchordL != bestchordR) {
|
matthiasm@50
|
705 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
706 }
|
matthiasm@43
|
707 count++;
|
Chris@23
|
708 }
|
matthiasm@43
|
709 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
710 count = 0;
|
matthiasm@43
|
711 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
712 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
713 float maxindex = 0; //... and the index thereof
|
Chris@91
|
714 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
715 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
716 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
717 maxindex = iChord;
|
matthiasm@43
|
718 // cerr << iChord << endl;
|
matthiasm@43
|
719 }
|
matthiasm@43
|
720 }
|
matthiasm@43
|
721 chordSequence.push_back(maxindex);
|
matthiasm@43
|
722 count++;
|
Chris@23
|
723 }
|
matthiasm@43
|
724
|
matthiasm@43
|
725
|
matthiasm@43
|
726 // mode filter on chordSequence
|
matthiasm@43
|
727 count = 0;
|
matthiasm@43
|
728 string oldChord = "";
|
matthiasm@43
|
729 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
730 Feature chord_feature; // chord estimate
|
matthiasm@43
|
731 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
732 chord_feature.timestamp = *it;
|
matthiasm@43
|
733 // Feature currentChord; // chord estimate
|
matthiasm@43
|
734 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
735 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
736
|
matthiasm@43
|
737 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
738 int maxChordCount = 0;
|
matthiasm@43
|
739 int maxChordIndex = nChord-1;
|
matthiasm@43
|
740 string maxChord;
|
matthiasm@43
|
741 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
742 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
743 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
744 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
745 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
746 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
747 maxChordCount++;
|
matthiasm@43
|
748 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
749 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
750 }
|
matthiasm@43
|
751 }
|
matthiasm@43
|
752 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
753 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
754 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
755 if (oldChord != maxChord) {
|
matthiasm@43
|
756 oldChord = maxChord;
|
matthiasm@43
|
757 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
758 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@91
|
759 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
760 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
|
matthiasm@86
|
761 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
762 }
|
matthiasm@86
|
763 oldnotes.clear();
|
Chris@91
|
764 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
765 Feature chordnote_feature;
|
matthiasm@86
|
766 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
767 chordnote_feature.timestamp = chord_feature.timestamp;
|
matthiasm@86
|
768 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
|
matthiasm@86
|
769 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
770 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
|
matthiasm@86
|
771 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
772 }
|
matthiasm@43
|
773 }
|
matthiasm@43
|
774 count++;
|
Chris@23
|
775 }
|
Chris@23
|
776 }
|
matthiasm@43
|
777 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
778 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
779 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
780 chord_feature.label = "N";
|
mail@60
|
781 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
782
|
Chris@91
|
783 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
784 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
785 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
786 }
|
matthiasm@86
|
787
|
Chris@23
|
788 cerr << "done." << endl;
|
matthiasm@50
|
789
|
matthiasm@50
|
790 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
791 Feature chordchange_feature;
|
matthiasm@50
|
792 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
793 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
794 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
795 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
796 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
797 }
|
matthiasm@50
|
798
|
mail@60
|
799 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
800
|
matthiasm@50
|
801
|
Chris@23
|
802 return fsOut;
|
matthiasm@0
|
803 }
|