Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
39 }
|
matthiasm@0
|
40
|
Chris@35
|
41 Chordino::~Chordino()
|
matthiasm@0
|
42 {
|
Chris@35
|
43 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
44 }
|
matthiasm@0
|
45
|
matthiasm@0
|
46 string
|
Chris@35
|
47 Chordino::getIdentifier() const
|
matthiasm@0
|
48 {
|
Chris@23
|
49 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
50 return "chordino";
|
matthiasm@0
|
51 }
|
matthiasm@0
|
52
|
matthiasm@0
|
53 string
|
Chris@35
|
54 Chordino::getName() const
|
matthiasm@0
|
55 {
|
Chris@23
|
56 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
57 return "Chordino";
|
matthiasm@0
|
58 }
|
matthiasm@0
|
59
|
matthiasm@0
|
60 string
|
Chris@35
|
61 Chordino::getDescription() const
|
matthiasm@0
|
62 {
|
Chris@23
|
63 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
65 }
|
matthiasm@0
|
66
|
matthiasm@50
|
67 Chordino::ParameterList
|
matthiasm@50
|
68 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
69 {
|
matthiasm@50
|
70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
71 ParameterList list;
|
matthiasm@50
|
72
|
mail@118
|
73 ParameterDescriptor useNNLSParam;
|
mail@118
|
74 useNNLSParam.identifier = "useNNLS";
|
mail@118
|
75 useNNLSParam.name = "use approximate transcription (NNLS)";
|
mail@118
|
76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
|
mail@118
|
77 useNNLSParam.unit = "";
|
mail@118
|
78 useNNLSParam.minValue = 0.0;
|
mail@118
|
79 useNNLSParam.maxValue = 1.0;
|
mail@118
|
80 useNNLSParam.defaultValue = 1.0;
|
mail@118
|
81 useNNLSParam.isQuantized = true;
|
mail@118
|
82 useNNLSParam.quantizeStep = 1.0;
|
mail@118
|
83 list.push_back(useNNLSParam);
|
matthiasm@50
|
84
|
mail@118
|
85 ParameterDescriptor useHMMParam;
|
mail@118
|
86 useHMMParam.identifier = "useHMM";
|
mail@118
|
87 useHMMParam.name = "HMM (Viterbi decoding)";
|
mail@118
|
88 useHMMParam.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
mail@118
|
89 useHMMParam.unit = "";
|
mail@118
|
90 useHMMParam.minValue = 0.0;
|
mail@118
|
91 useHMMParam.maxValue = 1.0;
|
mail@118
|
92 useHMMParam.defaultValue = 1.0;
|
mail@118
|
93 useHMMParam.isQuantized = true;
|
mail@118
|
94 useHMMParam.quantizeStep = 1.0;
|
mail@118
|
95 list.push_back(useHMMParam);
|
matthiasm@50
|
96
|
mail@118
|
97 ParameterDescriptor rollonParam;
|
mail@118
|
98 rollonParam.identifier = "rollon";
|
mail@118
|
99 rollonParam.name = "bass noise threshold";
|
mail@118
|
100 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
|
mail@118
|
101 rollonParam.unit = "%";
|
mail@118
|
102 rollonParam.minValue = 0;
|
mail@118
|
103 rollonParam.maxValue = 5;
|
mail@118
|
104 rollonParam.defaultValue = 0.0;
|
mail@118
|
105 rollonParam.isQuantized = true;
|
mail@118
|
106 rollonParam.quantizeStep = 0.5;
|
mail@118
|
107 list.push_back(rollonParam);
|
matthiasm@50
|
108
|
mail@118
|
109 ParameterDescriptor tuningmodeParam;
|
mail@118
|
110 tuningmodeParam.identifier = "tuningmode";
|
mail@118
|
111 tuningmodeParam.name = "tuning mode";
|
mail@118
|
112 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
mail@118
|
113 tuningmodeParam.unit = "";
|
mail@118
|
114 tuningmodeParam.minValue = 0;
|
mail@118
|
115 tuningmodeParam.maxValue = 1;
|
mail@118
|
116 tuningmodeParam.defaultValue = 0.0;
|
mail@118
|
117 tuningmodeParam.isQuantized = true;
|
mail@118
|
118 tuningmodeParam.valueNames.push_back("global tuning");
|
mail@118
|
119 tuningmodeParam.valueNames.push_back("local tuning");
|
mail@118
|
120 tuningmodeParam.quantizeStep = 1.0;
|
mail@118
|
121 list.push_back(tuningmodeParam);
|
matthiasm@50
|
122
|
mail@118
|
123 ParameterDescriptor whiteningParam;
|
mail@118
|
124 whiteningParam.identifier = "whitening";
|
mail@118
|
125 whiteningParam.name = "spectral whitening";
|
mail@118
|
126 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@118
|
127 whiteningParam.unit = "";
|
mail@118
|
128 whiteningParam.isQuantized = true;
|
mail@118
|
129 whiteningParam.minValue = 0.0;
|
mail@118
|
130 whiteningParam.maxValue = 1.0;
|
mail@118
|
131 whiteningParam.defaultValue = 1.0;
|
mail@118
|
132 whiteningParam.isQuantized = false;
|
mail@118
|
133 list.push_back(whiteningParam);
|
matthiasm@50
|
134
|
mail@118
|
135 ParameterDescriptor spectralShapeParam;
|
mail@118
|
136 spectralShapeParam.identifier = "spectralshape";
|
mail@118
|
137 spectralShapeParam.name = "spectral shape";
|
mail@118
|
138 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@118
|
139 spectralShapeParam.unit = "";
|
mail@118
|
140 spectralShapeParam.minValue = 0.5;
|
mail@118
|
141 spectralShapeParam.maxValue = 0.9;
|
mail@118
|
142 spectralShapeParam.defaultValue = 0.7;
|
mail@118
|
143 spectralShapeParam.isQuantized = false;
|
mail@118
|
144 list.push_back(spectralShapeParam);
|
matthiasm@50
|
145
|
mail@118
|
146 ParameterDescriptor boostnParam;
|
mail@118
|
147 boostnParam.identifier = "boostn";
|
mail@118
|
148 boostnParam.name = "boost N";
|
mail@118
|
149 boostnParam.description = "Boost likelihood of the N (no chord) label.";
|
mail@118
|
150 boostnParam.unit = "";
|
mail@118
|
151 boostnParam.minValue = 0.0;
|
mail@118
|
152 boostnParam.maxValue = 1.0;
|
mail@118
|
153 boostnParam.defaultValue = 0.1;
|
mail@118
|
154 boostnParam.isQuantized = false;
|
mail@118
|
155 list.push_back(boostnParam);
|
matthiasm@50
|
156
|
mail@118
|
157 ParameterDescriptor usehartesyntaxParam;
|
mail@118
|
158 usehartesyntaxParam.identifier = "usehartesyntax";
|
mail@118
|
159 usehartesyntaxParam.name = "use Harte syntax";
|
mail@118
|
160 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
|
mail@118
|
161 usehartesyntaxParam.unit = "";
|
mail@118
|
162 usehartesyntaxParam.minValue = 0.0;
|
mail@118
|
163 usehartesyntaxParam.maxValue = 1.0;
|
mail@118
|
164 usehartesyntaxParam.defaultValue = 0.0;
|
mail@118
|
165 usehartesyntaxParam.isQuantized = true;
|
mail@118
|
166 usehartesyntaxParam.quantizeStep = 1.0;
|
mail@118
|
167 usehartesyntaxParam.valueNames.push_back("no");
|
mail@118
|
168 usehartesyntaxParam.valueNames.push_back("yes");
|
mail@118
|
169 list.push_back(usehartesyntaxParam);
|
mail@112
|
170
|
matthiasm@50
|
171 return list;
|
matthiasm@50
|
172 }
|
matthiasm@50
|
173
|
Chris@35
|
174 Chordino::OutputList
|
Chris@35
|
175 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
176 {
|
Chris@23
|
177 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
178 OutputList list;
|
matthiasm@0
|
179
|
Chris@35
|
180 int index = 0;
|
matthiasm@0
|
181
|
matthiasm@0
|
182 OutputDescriptor d7;
|
matthiasm@0
|
183 d7.identifier = "simplechord";
|
Chris@36
|
184 d7.name = "Chord Estimate";
|
matthiasm@58
|
185 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
186 d7.unit = "";
|
matthiasm@0
|
187 d7.hasFixedBinCount = true;
|
matthiasm@0
|
188 d7.binCount = 0;
|
matthiasm@0
|
189 d7.hasKnownExtents = false;
|
matthiasm@0
|
190 d7.isQuantized = false;
|
matthiasm@0
|
191 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
192 d7.hasDuration = false;
|
matthiasm@0
|
193 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
194 list.push_back(d7);
|
Chris@35
|
195 m_outputChords = index++;
|
matthiasm@0
|
196
|
matthiasm@86
|
197 OutputDescriptor chordnotes;
|
matthiasm@86
|
198 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
199 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
200 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
201 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
202 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
203 chordnotes.binCount = 1;
|
matthiasm@86
|
204 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
205 chordnotes.minValue = 0;
|
matthiasm@86
|
206 chordnotes.maxValue = 127;
|
matthiasm@86
|
207 chordnotes.isQuantized = true;
|
matthiasm@86
|
208 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
209 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
210 chordnotes.hasDuration = true;
|
matthiasm@86
|
211 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
212 list.push_back(chordnotes);
|
matthiasm@86
|
213 m_outputChordnotes = index++;
|
matthiasm@86
|
214
|
Chris@23
|
215 OutputDescriptor d8;
|
mail@60
|
216 d8.identifier = "harmonicchange";
|
Chris@36
|
217 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
218 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
219 d8.unit = "";
|
matthiasm@17
|
220 d8.hasFixedBinCount = true;
|
matthiasm@17
|
221 d8.binCount = 1;
|
mail@60
|
222 d8.hasKnownExtents = false;
|
matthiasm@17
|
223 d8.isQuantized = false;
|
matthiasm@17
|
224 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
225 d8.hasDuration = false;
|
matthiasm@17
|
226 list.push_back(d8);
|
Chris@35
|
227 m_outputHarmonicChange = index++;
|
matthiasm@1
|
228
|
matthiasm@107
|
229 OutputDescriptor loglikelihood;
|
matthiasm@107
|
230 loglikelihood.identifier = "loglikelihood";
|
mail@126
|
231 loglikelihood.name = "Log-Likelihood of Chord Estimate";
|
mail@124
|
232 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
|
matthiasm@107
|
233 loglikelihood.unit = "";
|
matthiasm@107
|
234 loglikelihood.hasFixedBinCount = true;
|
matthiasm@107
|
235 loglikelihood.binCount = 1;
|
matthiasm@107
|
236 loglikelihood.hasKnownExtents = false;
|
matthiasm@107
|
237 loglikelihood.isQuantized = false;
|
matthiasm@107
|
238 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@107
|
239 loglikelihood.hasDuration = false;
|
matthiasm@107
|
240 list.push_back(loglikelihood);
|
matthiasm@107
|
241 m_outputLoglikelihood = index++;
|
matthiasm@106
|
242
|
matthiasm@0
|
243 return list;
|
matthiasm@0
|
244 }
|
matthiasm@0
|
245
|
matthiasm@0
|
246 bool
|
Chris@35
|
247 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
248 {
|
Chris@23
|
249 if (debug_on) {
|
Chris@23
|
250 cerr << "--> initialise";
|
Chris@23
|
251 }
|
mail@76
|
252
|
Chris@35
|
253 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
254 return false;
|
Chris@35
|
255 }
|
mail@115
|
256 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
|
matthiasm@0
|
257 return true;
|
matthiasm@0
|
258 }
|
matthiasm@0
|
259
|
matthiasm@0
|
260 void
|
Chris@35
|
261 Chordino::reset()
|
matthiasm@0
|
262 {
|
Chris@23
|
263 if (debug_on) cerr << "--> reset";
|
Chris@35
|
264 NNLSBase::reset();
|
matthiasm@0
|
265 }
|
matthiasm@0
|
266
|
Chris@35
|
267 Chordino::FeatureSet
|
Chris@35
|
268 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
269 {
|
Chris@23
|
270 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
271
|
Chris@35
|
272 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
273
|
Chris@35
|
274 return FeatureSet();
|
matthiasm@0
|
275 }
|
matthiasm@0
|
276
|
Chris@35
|
277 Chordino::FeatureSet
|
Chris@35
|
278 Chordino::getRemainingFeatures()
|
matthiasm@0
|
279 {
|
mail@89
|
280 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
281 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
282 FeatureSet fsOut;
|
Chris@35
|
283 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
284 int nChord = m_chordnames.size();
|
Chris@23
|
285 //
|
Chris@23
|
286 /** Calculate Tuning
|
Chris@23
|
287 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
288 cumulative mean real and imag values)
|
Chris@23
|
289 **/
|
mail@80
|
290 float meanTuningImag = 0;
|
mail@80
|
291 float meanTuningReal = 0;
|
mail@80
|
292 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
293 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
294 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
295 }
|
Chris@23
|
296 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
297 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
298 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
299 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
300
|
Chris@23
|
301 char buffer0 [50];
|
matthiasm@1
|
302
|
Chris@23
|
303 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
304
|
matthiasm@1
|
305
|
Chris@23
|
306 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
307 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
308 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
309 **/
|
Chris@35
|
310 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
311
|
Chris@23
|
312 int count = 0;
|
matthiasm@1
|
313
|
Chris@35
|
314 FeatureList tunedSpec;
|
matthiasm@43
|
315 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
316
|
matthiasm@43
|
317 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
318
|
Chris@35
|
319 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
320 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
321 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
322 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
323 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
324 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
325 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
326
|
Chris@23
|
327 if (m_tuneLocal) {
|
Chris@23
|
328 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
329 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
330 }
|
matthiasm@1
|
331
|
mail@80
|
332 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
333
|
Chris@91
|
334 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@115
|
335 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
336 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
337 }
|
matthiasm@1
|
338
|
matthiasm@43
|
339 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
340 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
341 vector<float> runningstd;
|
mail@77
|
342 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
343 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
344 }
|
Chris@23
|
345 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
346 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
347 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
348 if (runningstd[i] > 0) {
|
matthiasm@43
|
349 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
350 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
351 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
352 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
353 }
|
matthiasm@43
|
354 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
355 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
356 }
|
Chris@23
|
357 }
|
matthiasm@43
|
358 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
359 count++;
|
Chris@23
|
360 }
|
Chris@23
|
361 cerr << "done." << endl;
|
matthiasm@1
|
362
|
Chris@23
|
363 /** Semitone spectrum and chromagrams
|
Chris@23
|
364 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
365 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
366 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
367 bass and treble stacked onto each other).
|
Chris@23
|
368 **/
|
matthiasm@42
|
369 if (m_useNNLS == 0) {
|
Chris@35
|
370 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
371 } else {
|
Chris@35
|
372 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
373 }
|
matthiasm@13
|
374
|
matthiasm@1
|
375
|
matthiasm@43
|
376 vector<vector<double> > chordogram;
|
Chris@23
|
377 vector<vector<int> > scoreChordogram;
|
Chris@35
|
378 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
379 count = 0;
|
matthiasm@9
|
380
|
Chris@35
|
381 FeatureList chromaList;
|
matthiasm@43
|
382
|
matthiasm@43
|
383
|
Chris@35
|
384
|
Chris@35
|
385 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
386 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
387 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
388
|
matthiasm@43
|
389 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
390 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
391
|
mail@77
|
392 float b[nNote];
|
matthiasm@1
|
393
|
Chris@23
|
394 bool some_b_greater_zero = false;
|
Chris@23
|
395 float sumb = 0;
|
mail@77
|
396 for (int i = 0; i < nNote; i++) {
|
mail@77
|
397 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
398 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
399 sumb += b[i];
|
Chris@23
|
400 if (b[i] > 0) {
|
Chris@23
|
401 some_b_greater_zero = true;
|
Chris@23
|
402 }
|
Chris@23
|
403 }
|
matthiasm@1
|
404
|
Chris@23
|
405 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
406
|
Chris@23
|
407 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
408 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
409 float currval;
|
Chris@91
|
410 int iSemitone = 0;
|
matthiasm@1
|
411
|
Chris@23
|
412 if (some_b_greater_zero) {
|
matthiasm@42
|
413 if (m_useNNLS == 0) {
|
Chris@91
|
414 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
415 currval = 0;
|
mail@81
|
416 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
417 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
418 }
|
Chris@23
|
419 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
420 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
421 iSemitone++;
|
Chris@23
|
422 }
|
matthiasm@1
|
423
|
Chris@23
|
424 } else {
|
Chris@35
|
425 float x[84+1000];
|
Chris@23
|
426 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
427 vector<int> signifIndex;
|
Chris@23
|
428 int index=0;
|
Chris@23
|
429 sumb /= 84.0;
|
Chris@91
|
430 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
431 float currval = 0;
|
mail@81
|
432 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
433 currval += b[iNote + iBPS];
|
mail@81
|
434 }
|
Chris@23
|
435 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
436 index++;
|
Chris@23
|
437 }
|
Chris@35
|
438 float rnorm;
|
Chris@35
|
439 float w[84+1000];
|
Chris@35
|
440 float zz[84+1000];
|
Chris@23
|
441 int indx[84+1000];
|
Chris@23
|
442 int mode;
|
mail@77
|
443 int dictsize = nNote*signifIndex.size();
|
mail@81
|
444 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
445 float *curr_dict = new float[dictsize];
|
Chris@91
|
446 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
447 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
448 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
449 }
|
Chris@23
|
450 }
|
Chris@35
|
451 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
452 delete [] curr_dict;
|
Chris@91
|
453 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
454 // cerr << mode << endl;
|
Chris@23
|
455 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
456 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
457 }
|
Chris@23
|
458 }
|
Chris@23
|
459 }
|
Chris@35
|
460
|
Chris@35
|
461 vector<float> origchroma = chroma;
|
Chris@23
|
462 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
463 currentChromas.values = chroma;
|
Chris@35
|
464
|
Chris@23
|
465 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
466 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
467 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
468 case 0: // should never end up here
|
Chris@23
|
469 break;
|
Chris@23
|
470 case 1:
|
Chris@35
|
471 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
472 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
473 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
474 break;
|
Chris@23
|
475 case 2:
|
Chris@35
|
476 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
477 chromanorm[2] += *it;
|
Chris@23
|
478 }
|
Chris@23
|
479 break;
|
Chris@23
|
480 case 3:
|
Chris@35
|
481 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
482 chromanorm[2] += pow(*it,2);
|
Chris@23
|
483 }
|
Chris@23
|
484 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
485 break;
|
Chris@23
|
486 }
|
Chris@23
|
487 if (chromanorm[2] > 0) {
|
Chris@91
|
488 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
489 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
490 }
|
Chris@23
|
491 }
|
Chris@23
|
492 }
|
Chris@35
|
493
|
matthiasm@43
|
494 chromaList.push_back(currentChromas);
|
Chris@35
|
495
|
Chris@23
|
496 // local chord estimation
|
matthiasm@43
|
497 vector<double> currentChordSalience;
|
matthiasm@43
|
498 double tempchordvalue = 0;
|
matthiasm@43
|
499 double sumchordvalue = 0;
|
matthiasm@9
|
500
|
Chris@23
|
501 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
502 tempchordvalue = 0;
|
Chris@23
|
503 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
504 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
505 }
|
Chris@23
|
506 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
507 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
508 }
|
matthiasm@48
|
509 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
510 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
511 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
512 sumchordvalue+=tempchordvalue;
|
Chris@23
|
513 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
514 }
|
Chris@23
|
515 if (sumchordvalue > 0) {
|
Chris@23
|
516 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
517 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
518 }
|
Chris@23
|
519 } else {
|
Chris@23
|
520 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
521 }
|
Chris@23
|
522 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
523
|
Chris@23
|
524 count++;
|
Chris@23
|
525 }
|
Chris@23
|
526 cerr << "done." << endl;
|
matthiasm@13
|
527
|
matthiasm@86
|
528 vector<Feature> oldnotes;
|
matthiasm@10
|
529
|
matthiasm@50
|
530 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
531 if (m_useHMM == 1.0) {
|
matthiasm@44
|
532 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
533 int oldchord = nChord-1;
|
matthiasm@48
|
534 double selftransprob = 0.99;
|
matthiasm@43
|
535
|
matthiasm@48
|
536 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
537 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
538
|
matthiasm@50
|
539 double *delta;
|
matthiasm@50
|
540 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
541
|
matthiasm@43
|
542 vector<vector<double> > trans;
|
matthiasm@43
|
543 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
544 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
545 temp[iChord] = selftransprob;
|
matthiasm@43
|
546 trans.push_back(temp);
|
matthiasm@43
|
547 }
|
matthiasm@106
|
548 vector<double> scale;
|
matthiasm@106
|
549 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
|
matthiasm@106
|
550
|
matthiasm@48
|
551
|
matthiasm@48
|
552 Feature chord_feature; // chord estimate
|
matthiasm@48
|
553 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
554 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
555 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
556 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
557
|
mail@60
|
558 chordchange[0] = 0;
|
Chris@91
|
559 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@43
|
560 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
561 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@86
|
562 // chord
|
matthiasm@43
|
563 Feature chord_feature; // chord estimate
|
matthiasm@43
|
564 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
565 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
566 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
567 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
568 oldchord = chordpath[iFrame];
|
matthiasm@86
|
569 // chord notes
|
Chris@91
|
570 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
571 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@86
|
572 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
573 }
|
matthiasm@86
|
574 oldnotes.clear();
|
Chris@91
|
575 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
576 Feature chordnote_feature;
|
matthiasm@86
|
577 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
578 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@86
|
579 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@86
|
580 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
581 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@86
|
582 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
583 }
|
Chris@23
|
584 }
|
matthiasm@50
|
585 /* calculating simple chord change prob */
|
matthiasm@50
|
586 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
587 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
588 }
|
Chris@23
|
589 }
|
matthiasm@43
|
590
|
matthiasm@106
|
591 float logscale = 0;
|
matthiasm@106
|
592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@106
|
593 logscale -= log(scale[iFrame]);
|
matthiasm@106
|
594 Feature loglikelihood;
|
matthiasm@106
|
595 loglikelihood.hasTimestamp = true;
|
matthiasm@106
|
596 loglikelihood.timestamp = timestamps[iFrame];
|
matthiasm@106
|
597 loglikelihood.values.push_back(-log(scale[iFrame]));
|
matthiasm@106
|
598 // cerr << chordchange[iFrame] << endl;
|
matthiasm@107
|
599 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
|
matthiasm@106
|
600 }
|
matthiasm@106
|
601 logscale /= nFrame;
|
mail@111
|
602 // cerr << "loglik" << logscale << endl;
|
matthiasm@106
|
603
|
matthiasm@106
|
604
|
matthiasm@43
|
605 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
606 } else {
|
matthiasm@43
|
607 /* Simple chord estimation
|
matthiasm@43
|
608 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
609 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
610 */
|
matthiasm@44
|
611 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
612 count = 0;
|
matthiasm@43
|
613 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
614 vector<int> chordSequence;
|
matthiasm@43
|
615 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
616 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
617 scoreChordogram.push_back(temp);
|
matthiasm@43
|
618 }
|
matthiasm@43
|
619 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
620 int startIndex = count + 1;
|
matthiasm@43
|
621 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
622
|
matthiasm@43
|
623 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
624
|
matthiasm@43
|
625 vector<int> chordCandidates;
|
Chris@91
|
626 for (int iChord = 0; iChord+1 < nChord; iChord++) {
|
matthiasm@43
|
627 // float currsum = 0;
|
Chris@91
|
628 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
629 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
630 // }
|
matthiasm@43
|
631 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@91
|
632 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
633 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
634 chordCandidates.push_back(iChord);
|
matthiasm@43
|
635 break;
|
matthiasm@43
|
636 }
|
Chris@23
|
637 }
|
Chris@23
|
638 }
|
matthiasm@43
|
639 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
640 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
641
|
matthiasm@43
|
642 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
643 float maxindex = 0; //... and the index thereof
|
Chris@91
|
644 int bestchordL = nChord-1; // index of the best "left" chord
|
Chris@91
|
645 int bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
646
|
matthiasm@43
|
647 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
648 // now find the max values on both sides of iWF
|
matthiasm@43
|
649 // left side:
|
matthiasm@43
|
650 float maxL = 0;
|
Chris@91
|
651 int maxindL = nChord-1;
|
Chris@91
|
652 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
653 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
654 float currsum = 0;
|
Chris@91
|
655 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
656 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
657 }
|
matthiasm@43
|
658 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
659 if (currsum > maxL) {
|
matthiasm@43
|
660 maxL = currsum;
|
matthiasm@43
|
661 maxindL = iChord;
|
matthiasm@43
|
662 }
|
matthiasm@43
|
663 }
|
matthiasm@43
|
664 // right side:
|
matthiasm@43
|
665 float maxR = 0;
|
Chris@91
|
666 int maxindR = nChord-1;
|
Chris@91
|
667 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
668 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
669 float currsum = 0;
|
Chris@91
|
670 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
671 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
672 }
|
matthiasm@43
|
673 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
674 if (currsum > maxR) {
|
matthiasm@43
|
675 maxR = currsum;
|
matthiasm@43
|
676 maxindR = iChord;
|
matthiasm@43
|
677 }
|
matthiasm@43
|
678 }
|
matthiasm@43
|
679 if (maxL+maxR > maxval) {
|
matthiasm@43
|
680 maxval = maxL+maxR;
|
matthiasm@43
|
681 maxindex = iWF;
|
matthiasm@43
|
682 bestchordL = maxindL;
|
matthiasm@43
|
683 bestchordR = maxindR;
|
matthiasm@43
|
684 }
|
matthiasm@43
|
685
|
Chris@23
|
686 }
|
matthiasm@43
|
687 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
688 // add a score to every chord-frame-point that was part of a maximum
|
Chris@91
|
689 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
690 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
691 }
|
Chris@91
|
692 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
693 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
694 }
|
matthiasm@50
|
695 if (bestchordL != bestchordR) {
|
matthiasm@50
|
696 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
697 }
|
matthiasm@43
|
698 count++;
|
Chris@23
|
699 }
|
matthiasm@43
|
700 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
701 count = 0;
|
matthiasm@43
|
702 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
703 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
704 float maxindex = 0; //... and the index thereof
|
Chris@91
|
705 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
706 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
707 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
708 maxindex = iChord;
|
matthiasm@43
|
709 // cerr << iChord << endl;
|
matthiasm@43
|
710 }
|
matthiasm@43
|
711 }
|
matthiasm@43
|
712 chordSequence.push_back(maxindex);
|
matthiasm@43
|
713 count++;
|
Chris@23
|
714 }
|
matthiasm@43
|
715
|
matthiasm@43
|
716
|
matthiasm@43
|
717 // mode filter on chordSequence
|
matthiasm@43
|
718 count = 0;
|
matthiasm@43
|
719 string oldChord = "";
|
matthiasm@43
|
720 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
721 Feature chord_feature; // chord estimate
|
matthiasm@43
|
722 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
723 chord_feature.timestamp = *it;
|
matthiasm@43
|
724 // Feature currentChord; // chord estimate
|
matthiasm@43
|
725 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
726 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
727
|
matthiasm@43
|
728 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
729 int maxChordCount = 0;
|
matthiasm@43
|
730 int maxChordIndex = nChord-1;
|
matthiasm@43
|
731 string maxChord;
|
matthiasm@43
|
732 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
733 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
734 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
735 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
736 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
737 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
738 maxChordCount++;
|
matthiasm@43
|
739 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
740 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
741 }
|
matthiasm@43
|
742 }
|
matthiasm@43
|
743 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
744 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
745 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
746 if (oldChord != maxChord) {
|
matthiasm@43
|
747 oldChord = maxChord;
|
matthiasm@43
|
748 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
749 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@91
|
750 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
751 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
|
matthiasm@86
|
752 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
753 }
|
matthiasm@86
|
754 oldnotes.clear();
|
Chris@91
|
755 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
756 Feature chordnote_feature;
|
matthiasm@86
|
757 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
758 chordnote_feature.timestamp = chord_feature.timestamp;
|
matthiasm@86
|
759 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
|
matthiasm@86
|
760 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
761 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
|
matthiasm@86
|
762 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
763 }
|
matthiasm@43
|
764 }
|
matthiasm@43
|
765 count++;
|
Chris@23
|
766 }
|
Chris@23
|
767 }
|
matthiasm@43
|
768 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
769 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
770 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
771 chord_feature.label = "N";
|
mail@60
|
772 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
773
|
Chris@91
|
774 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
775 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
776 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
777 }
|
matthiasm@86
|
778
|
Chris@23
|
779 cerr << "done." << endl;
|
matthiasm@50
|
780
|
matthiasm@50
|
781 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
782 Feature chordchange_feature;
|
matthiasm@50
|
783 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
784 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
785 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
786 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
787 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
788 }
|
matthiasm@50
|
789
|
mail@60
|
790 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
791
|
matthiasm@50
|
792
|
Chris@23
|
793 return fsOut;
|
matthiasm@0
|
794 }
|