Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
39 }
|
matthiasm@0
|
40
|
Chris@35
|
41 Chordino::~Chordino()
|
matthiasm@0
|
42 {
|
Chris@35
|
43 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
44 }
|
matthiasm@0
|
45
|
matthiasm@0
|
46 string
|
Chris@35
|
47 Chordino::getIdentifier() const
|
matthiasm@0
|
48 {
|
Chris@23
|
49 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
50 return "chordino";
|
matthiasm@0
|
51 }
|
matthiasm@0
|
52
|
matthiasm@0
|
53 string
|
Chris@35
|
54 Chordino::getName() const
|
matthiasm@0
|
55 {
|
Chris@23
|
56 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
57 return "Chordino";
|
matthiasm@0
|
58 }
|
matthiasm@0
|
59
|
matthiasm@0
|
60 string
|
Chris@35
|
61 Chordino::getDescription() const
|
matthiasm@0
|
62 {
|
Chris@23
|
63 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
65 }
|
matthiasm@0
|
66
|
matthiasm@50
|
67 Chordino::ParameterList
|
matthiasm@50
|
68 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
69 {
|
matthiasm@50
|
70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
71 ParameterList list;
|
matthiasm@50
|
72
|
mail@118
|
73 ParameterDescriptor useNNLSParam;
|
mail@118
|
74 useNNLSParam.identifier = "useNNLS";
|
mail@118
|
75 useNNLSParam.name = "use approximate transcription (NNLS)";
|
mail@118
|
76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
|
mail@118
|
77 useNNLSParam.unit = "";
|
mail@118
|
78 useNNLSParam.minValue = 0.0;
|
mail@118
|
79 useNNLSParam.maxValue = 1.0;
|
mail@118
|
80 useNNLSParam.defaultValue = 1.0;
|
mail@118
|
81 useNNLSParam.isQuantized = true;
|
mail@118
|
82 useNNLSParam.quantizeStep = 1.0;
|
mail@118
|
83 list.push_back(useNNLSParam);
|
matthiasm@50
|
84
|
mail@118
|
85 ParameterDescriptor useHMMParam;
|
mail@118
|
86 useHMMParam.identifier = "useHMM";
|
mail@118
|
87 useHMMParam.name = "HMM (Viterbi decoding)";
|
mail@118
|
88 useHMMParam.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
mail@118
|
89 useHMMParam.unit = "";
|
mail@118
|
90 useHMMParam.minValue = 0.0;
|
mail@118
|
91 useHMMParam.maxValue = 1.0;
|
mail@118
|
92 useHMMParam.defaultValue = 1.0;
|
mail@118
|
93 useHMMParam.isQuantized = true;
|
mail@118
|
94 useHMMParam.quantizeStep = 1.0;
|
mail@118
|
95 list.push_back(useHMMParam);
|
matthiasm@50
|
96
|
mail@118
|
97 ParameterDescriptor rollonParam;
|
mail@118
|
98 rollonParam.identifier = "rollon";
|
mail@118
|
99 rollonParam.name = "bass noise threshold";
|
mail@118
|
100 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
|
mail@118
|
101 rollonParam.unit = "%";
|
mail@118
|
102 rollonParam.minValue = 0;
|
mail@118
|
103 rollonParam.maxValue = 5;
|
mail@118
|
104 rollonParam.defaultValue = 0.0;
|
mail@118
|
105 rollonParam.isQuantized = true;
|
mail@118
|
106 rollonParam.quantizeStep = 0.5;
|
mail@118
|
107 list.push_back(rollonParam);
|
matthiasm@50
|
108
|
mail@118
|
109 ParameterDescriptor tuningmodeParam;
|
mail@118
|
110 tuningmodeParam.identifier = "tuningmode";
|
mail@118
|
111 tuningmodeParam.name = "tuning mode";
|
mail@118
|
112 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
mail@118
|
113 tuningmodeParam.unit = "";
|
mail@118
|
114 tuningmodeParam.minValue = 0;
|
mail@118
|
115 tuningmodeParam.maxValue = 1;
|
mail@118
|
116 tuningmodeParam.defaultValue = 0.0;
|
mail@118
|
117 tuningmodeParam.isQuantized = true;
|
mail@118
|
118 tuningmodeParam.valueNames.push_back("global tuning");
|
mail@118
|
119 tuningmodeParam.valueNames.push_back("local tuning");
|
mail@118
|
120 tuningmodeParam.quantizeStep = 1.0;
|
mail@118
|
121 list.push_back(tuningmodeParam);
|
matthiasm@50
|
122
|
mail@118
|
123 ParameterDescriptor whiteningParam;
|
mail@118
|
124 whiteningParam.identifier = "whitening";
|
mail@118
|
125 whiteningParam.name = "spectral whitening";
|
mail@118
|
126 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@118
|
127 whiteningParam.unit = "";
|
mail@118
|
128 whiteningParam.isQuantized = true;
|
mail@118
|
129 whiteningParam.minValue = 0.0;
|
mail@118
|
130 whiteningParam.maxValue = 1.0;
|
mail@118
|
131 whiteningParam.defaultValue = 1.0;
|
mail@118
|
132 whiteningParam.isQuantized = false;
|
mail@118
|
133 list.push_back(whiteningParam);
|
matthiasm@50
|
134
|
mail@118
|
135 ParameterDescriptor spectralShapeParam;
|
mail@118
|
136 spectralShapeParam.identifier = "spectralshape";
|
mail@118
|
137 spectralShapeParam.name = "spectral shape";
|
mail@118
|
138 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@118
|
139 spectralShapeParam.unit = "";
|
mail@118
|
140 spectralShapeParam.minValue = 0.5;
|
mail@118
|
141 spectralShapeParam.maxValue = 0.9;
|
mail@118
|
142 spectralShapeParam.defaultValue = 0.7;
|
mail@118
|
143 spectralShapeParam.isQuantized = false;
|
mail@118
|
144 list.push_back(spectralShapeParam);
|
matthiasm@50
|
145
|
mail@118
|
146 ParameterDescriptor boostnParam;
|
mail@118
|
147 boostnParam.identifier = "boostn";
|
mail@118
|
148 boostnParam.name = "boost N";
|
mail@118
|
149 boostnParam.description = "Boost likelihood of the N (no chord) label.";
|
mail@118
|
150 boostnParam.unit = "";
|
mail@118
|
151 boostnParam.minValue = 0.0;
|
mail@118
|
152 boostnParam.maxValue = 1.0;
|
mail@118
|
153 boostnParam.defaultValue = 0.1;
|
mail@118
|
154 boostnParam.isQuantized = false;
|
mail@118
|
155 list.push_back(boostnParam);
|
matthiasm@50
|
156
|
mail@118
|
157 ParameterDescriptor usehartesyntaxParam;
|
mail@118
|
158 usehartesyntaxParam.identifier = "usehartesyntax";
|
mail@118
|
159 usehartesyntaxParam.name = "use Harte syntax";
|
mail@118
|
160 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
|
mail@118
|
161 usehartesyntaxParam.unit = "";
|
mail@118
|
162 usehartesyntaxParam.minValue = 0.0;
|
mail@118
|
163 usehartesyntaxParam.maxValue = 1.0;
|
mail@118
|
164 usehartesyntaxParam.defaultValue = 0.0;
|
mail@118
|
165 usehartesyntaxParam.isQuantized = true;
|
mail@118
|
166 usehartesyntaxParam.quantizeStep = 1.0;
|
mail@118
|
167 usehartesyntaxParam.valueNames.push_back("no");
|
mail@118
|
168 usehartesyntaxParam.valueNames.push_back("yes");
|
mail@118
|
169 list.push_back(usehartesyntaxParam);
|
mail@112
|
170
|
matthiasm@50
|
171 return list;
|
matthiasm@50
|
172 }
|
matthiasm@50
|
173
|
Chris@35
|
174 Chordino::OutputList
|
Chris@35
|
175 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
176 {
|
Chris@23
|
177 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
178 OutputList list;
|
matthiasm@0
|
179
|
Chris@35
|
180 int index = 0;
|
matthiasm@0
|
181
|
matthiasm@0
|
182 OutputDescriptor d7;
|
matthiasm@0
|
183 d7.identifier = "simplechord";
|
Chris@36
|
184 d7.name = "Chord Estimate";
|
matthiasm@58
|
185 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
186 d7.unit = "";
|
matthiasm@0
|
187 d7.hasFixedBinCount = true;
|
matthiasm@0
|
188 d7.binCount = 0;
|
matthiasm@0
|
189 d7.hasKnownExtents = false;
|
matthiasm@0
|
190 d7.isQuantized = false;
|
matthiasm@0
|
191 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
192 d7.hasDuration = false;
|
matthiasm@0
|
193 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
194 list.push_back(d7);
|
Chris@35
|
195 m_outputChords = index++;
|
matthiasm@0
|
196
|
matthiasm@86
|
197 OutputDescriptor chordnotes;
|
matthiasm@86
|
198 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
199 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
200 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
201 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
202 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
203 chordnotes.binCount = 1;
|
matthiasm@86
|
204 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
205 chordnotes.minValue = 0;
|
matthiasm@86
|
206 chordnotes.maxValue = 127;
|
matthiasm@86
|
207 chordnotes.isQuantized = true;
|
matthiasm@86
|
208 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
209 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
210 chordnotes.hasDuration = true;
|
matthiasm@86
|
211 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
212 list.push_back(chordnotes);
|
matthiasm@86
|
213 m_outputChordnotes = index++;
|
matthiasm@86
|
214
|
Chris@23
|
215 OutputDescriptor d8;
|
mail@60
|
216 d8.identifier = "harmonicchange";
|
Chris@36
|
217 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
218 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
219 d8.unit = "";
|
matthiasm@17
|
220 d8.hasFixedBinCount = true;
|
matthiasm@17
|
221 d8.binCount = 1;
|
mail@60
|
222 d8.hasKnownExtents = false;
|
mail@60
|
223 // d8.minValue = 0.0;
|
mail@60
|
224 // d8.maxValue = 0.999;
|
matthiasm@17
|
225 d8.isQuantized = false;
|
matthiasm@17
|
226 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
227 d8.hasDuration = false;
|
matthiasm@17
|
228 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
229 list.push_back(d8);
|
Chris@35
|
230 m_outputHarmonicChange = index++;
|
matthiasm@1
|
231
|
matthiasm@107
|
232 OutputDescriptor loglikelihood;
|
matthiasm@107
|
233 loglikelihood.identifier = "loglikelihood";
|
matthiasm@107
|
234 loglikelihood.name = "chord estimate log-likelihood";
|
matthiasm@107
|
235 loglikelihood.description = ".";
|
matthiasm@107
|
236 loglikelihood.unit = "";
|
matthiasm@107
|
237 loglikelihood.hasFixedBinCount = true;
|
matthiasm@107
|
238 loglikelihood.binCount = 1;
|
matthiasm@107
|
239 loglikelihood.hasKnownExtents = false;
|
matthiasm@107
|
240 loglikelihood.isQuantized = false;
|
matthiasm@107
|
241 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@107
|
242 loglikelihood.hasDuration = false;
|
matthiasm@107
|
243 // loglikelihood.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@107
|
244 list.push_back(loglikelihood);
|
matthiasm@107
|
245 m_outputLoglikelihood = index++;
|
matthiasm@106
|
246
|
matthiasm@0
|
247 return list;
|
matthiasm@0
|
248 }
|
matthiasm@0
|
249
|
matthiasm@0
|
250 bool
|
Chris@35
|
251 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
252 {
|
Chris@23
|
253 if (debug_on) {
|
Chris@23
|
254 cerr << "--> initialise";
|
Chris@23
|
255 }
|
mail@76
|
256
|
Chris@35
|
257 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
258 return false;
|
Chris@35
|
259 }
|
mail@115
|
260 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
|
matthiasm@0
|
261 return true;
|
matthiasm@0
|
262 }
|
matthiasm@0
|
263
|
matthiasm@0
|
264 void
|
Chris@35
|
265 Chordino::reset()
|
matthiasm@0
|
266 {
|
Chris@23
|
267 if (debug_on) cerr << "--> reset";
|
Chris@35
|
268 NNLSBase::reset();
|
matthiasm@0
|
269 }
|
matthiasm@0
|
270
|
Chris@35
|
271 Chordino::FeatureSet
|
Chris@35
|
272 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
273 {
|
Chris@23
|
274 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
275
|
Chris@35
|
276 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
277
|
Chris@35
|
278 return FeatureSet();
|
matthiasm@0
|
279 }
|
matthiasm@0
|
280
|
Chris@35
|
281 Chordino::FeatureSet
|
Chris@35
|
282 Chordino::getRemainingFeatures()
|
matthiasm@0
|
283 {
|
mail@89
|
284 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
285 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
286 FeatureSet fsOut;
|
Chris@35
|
287 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
288 int nChord = m_chordnames.size();
|
Chris@23
|
289 //
|
Chris@23
|
290 /** Calculate Tuning
|
Chris@23
|
291 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
292 cumulative mean real and imag values)
|
Chris@23
|
293 **/
|
mail@80
|
294 float meanTuningImag = 0;
|
mail@80
|
295 float meanTuningReal = 0;
|
mail@80
|
296 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
297 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
298 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
299 }
|
Chris@23
|
300 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
301 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
302 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
303 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
304
|
Chris@23
|
305 char buffer0 [50];
|
matthiasm@1
|
306
|
Chris@23
|
307 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
308
|
matthiasm@1
|
309
|
Chris@23
|
310 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
311 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
312 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
313 **/
|
Chris@35
|
314 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
315
|
Chris@23
|
316 int count = 0;
|
matthiasm@1
|
317
|
Chris@35
|
318 FeatureList tunedSpec;
|
matthiasm@43
|
319 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
320
|
matthiasm@43
|
321 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
322
|
Chris@35
|
323 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
324 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
325 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
326 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
327 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
328 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
329 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
330
|
Chris@23
|
331 if (m_tuneLocal) {
|
Chris@23
|
332 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
333 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
334 }
|
matthiasm@1
|
335
|
mail@80
|
336 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
337
|
Chris@91
|
338 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@115
|
339 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
340 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
341 }
|
matthiasm@1
|
342
|
matthiasm@43
|
343 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
344 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
345 vector<float> runningstd;
|
mail@77
|
346 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
347 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
348 }
|
Chris@23
|
349 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
350 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
351 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
352 if (runningstd[i] > 0) {
|
matthiasm@43
|
353 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
354 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
355 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
356 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
357 }
|
matthiasm@43
|
358 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
359 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
360 }
|
Chris@23
|
361 }
|
matthiasm@43
|
362 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
363 count++;
|
Chris@23
|
364 }
|
Chris@23
|
365 cerr << "done." << endl;
|
matthiasm@1
|
366
|
Chris@23
|
367 /** Semitone spectrum and chromagrams
|
Chris@23
|
368 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
369 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
370 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
371 bass and treble stacked onto each other).
|
Chris@23
|
372 **/
|
matthiasm@42
|
373 if (m_useNNLS == 0) {
|
Chris@35
|
374 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
375 } else {
|
Chris@35
|
376 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
377 }
|
matthiasm@13
|
378
|
matthiasm@1
|
379
|
matthiasm@43
|
380 vector<vector<double> > chordogram;
|
Chris@23
|
381 vector<vector<int> > scoreChordogram;
|
Chris@35
|
382 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
383 count = 0;
|
matthiasm@9
|
384
|
Chris@35
|
385 FeatureList chromaList;
|
matthiasm@43
|
386
|
matthiasm@43
|
387
|
Chris@35
|
388
|
Chris@35
|
389 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
390 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
391 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
392
|
matthiasm@43
|
393 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
394 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
395
|
mail@77
|
396 float b[nNote];
|
matthiasm@1
|
397
|
Chris@23
|
398 bool some_b_greater_zero = false;
|
Chris@23
|
399 float sumb = 0;
|
mail@77
|
400 for (int i = 0; i < nNote; i++) {
|
mail@77
|
401 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
402 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
403 sumb += b[i];
|
Chris@23
|
404 if (b[i] > 0) {
|
Chris@23
|
405 some_b_greater_zero = true;
|
Chris@23
|
406 }
|
Chris@23
|
407 }
|
matthiasm@1
|
408
|
Chris@23
|
409 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
410
|
Chris@23
|
411 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
412 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
413 float currval;
|
Chris@91
|
414 int iSemitone = 0;
|
matthiasm@1
|
415
|
Chris@23
|
416 if (some_b_greater_zero) {
|
matthiasm@42
|
417 if (m_useNNLS == 0) {
|
Chris@91
|
418 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
419 currval = 0;
|
mail@81
|
420 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
421 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
422 }
|
Chris@23
|
423 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
424 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
425 iSemitone++;
|
Chris@23
|
426 }
|
matthiasm@1
|
427
|
Chris@23
|
428 } else {
|
Chris@35
|
429 float x[84+1000];
|
Chris@23
|
430 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
431 vector<int> signifIndex;
|
Chris@23
|
432 int index=0;
|
Chris@23
|
433 sumb /= 84.0;
|
Chris@91
|
434 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
435 float currval = 0;
|
mail@81
|
436 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
437 currval += b[iNote + iBPS];
|
mail@81
|
438 }
|
Chris@23
|
439 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
440 index++;
|
Chris@23
|
441 }
|
Chris@35
|
442 float rnorm;
|
Chris@35
|
443 float w[84+1000];
|
Chris@35
|
444 float zz[84+1000];
|
Chris@23
|
445 int indx[84+1000];
|
Chris@23
|
446 int mode;
|
mail@77
|
447 int dictsize = nNote*signifIndex.size();
|
mail@81
|
448 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
449 float *curr_dict = new float[dictsize];
|
Chris@91
|
450 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
451 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
452 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
453 }
|
Chris@23
|
454 }
|
Chris@35
|
455 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
456 delete [] curr_dict;
|
Chris@91
|
457 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
458 // cerr << mode << endl;
|
Chris@23
|
459 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
460 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
461 }
|
Chris@23
|
462 }
|
Chris@23
|
463 }
|
Chris@35
|
464
|
Chris@35
|
465 vector<float> origchroma = chroma;
|
Chris@23
|
466 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
467 currentChromas.values = chroma;
|
Chris@35
|
468
|
Chris@23
|
469 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
470 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
471 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
472 case 0: // should never end up here
|
Chris@23
|
473 break;
|
Chris@23
|
474 case 1:
|
Chris@35
|
475 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
476 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
477 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
478 break;
|
Chris@23
|
479 case 2:
|
Chris@35
|
480 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
481 chromanorm[2] += *it;
|
Chris@23
|
482 }
|
Chris@23
|
483 break;
|
Chris@23
|
484 case 3:
|
Chris@35
|
485 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
486 chromanorm[2] += pow(*it,2);
|
Chris@23
|
487 }
|
Chris@23
|
488 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
489 break;
|
Chris@23
|
490 }
|
Chris@23
|
491 if (chromanorm[2] > 0) {
|
Chris@91
|
492 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
493 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
494 }
|
Chris@23
|
495 }
|
Chris@23
|
496 }
|
Chris@35
|
497
|
mail@125
|
498 if (*max_element(origchroma.begin(), origchroma.end()) == 0) {
|
mail@125
|
499 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@122
|
500 chroma[i] = 1;
|
matthiasm@122
|
501 }
|
mail@125
|
502 }
|
mail@113
|
503
|
matthiasm@43
|
504 chromaList.push_back(currentChromas);
|
Chris@35
|
505
|
Chris@23
|
506 // local chord estimation
|
matthiasm@43
|
507 vector<double> currentChordSalience;
|
matthiasm@43
|
508 double tempchordvalue = 0;
|
matthiasm@43
|
509 double sumchordvalue = 0;
|
matthiasm@9
|
510
|
Chris@23
|
511 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
512 tempchordvalue = 0;
|
Chris@23
|
513 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
514 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
515 }
|
Chris@23
|
516 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
517 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
518 }
|
matthiasm@48
|
519 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
520 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
521 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
522 sumchordvalue+=tempchordvalue;
|
Chris@23
|
523 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
524 }
|
Chris@23
|
525 if (sumchordvalue > 0) {
|
Chris@23
|
526 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
527 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
528 }
|
Chris@23
|
529 } else {
|
Chris@23
|
530 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
531 }
|
Chris@23
|
532 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
533
|
Chris@23
|
534 count++;
|
Chris@23
|
535 }
|
Chris@23
|
536 cerr << "done." << endl;
|
matthiasm@13
|
537
|
matthiasm@86
|
538 vector<Feature> oldnotes;
|
matthiasm@10
|
539
|
matthiasm@50
|
540 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
541 if (m_useHMM == 1.0) {
|
matthiasm@44
|
542 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
543 int oldchord = nChord-1;
|
matthiasm@48
|
544 double selftransprob = 0.99;
|
matthiasm@43
|
545
|
matthiasm@48
|
546 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
547 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
548
|
matthiasm@50
|
549 double *delta;
|
matthiasm@50
|
550 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
551
|
matthiasm@43
|
552 vector<vector<double> > trans;
|
matthiasm@43
|
553 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
554 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
555 temp[iChord] = selftransprob;
|
matthiasm@43
|
556 trans.push_back(temp);
|
matthiasm@43
|
557 }
|
matthiasm@106
|
558 vector<double> scale;
|
matthiasm@106
|
559 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
|
matthiasm@106
|
560
|
matthiasm@48
|
561
|
matthiasm@48
|
562 Feature chord_feature; // chord estimate
|
matthiasm@48
|
563 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
564 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
565 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
566 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
567
|
mail@60
|
568 chordchange[0] = 0;
|
Chris@91
|
569 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@43
|
570 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
571 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@86
|
572 // chord
|
matthiasm@43
|
573 Feature chord_feature; // chord estimate
|
matthiasm@43
|
574 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
575 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
576 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
577 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
578 oldchord = chordpath[iFrame];
|
matthiasm@86
|
579 // chord notes
|
Chris@91
|
580 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
581 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@86
|
582 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
583 }
|
matthiasm@86
|
584 oldnotes.clear();
|
Chris@91
|
585 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
586 Feature chordnote_feature;
|
matthiasm@86
|
587 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
588 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@86
|
589 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@86
|
590 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
591 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@86
|
592 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
593 }
|
Chris@23
|
594 }
|
matthiasm@50
|
595 /* calculating simple chord change prob */
|
matthiasm@50
|
596 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
597 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
598 }
|
Chris@23
|
599 }
|
matthiasm@43
|
600
|
matthiasm@106
|
601 float logscale = 0;
|
matthiasm@106
|
602 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@106
|
603 logscale -= log(scale[iFrame]);
|
matthiasm@106
|
604 Feature loglikelihood;
|
matthiasm@106
|
605 loglikelihood.hasTimestamp = true;
|
matthiasm@106
|
606 loglikelihood.timestamp = timestamps[iFrame];
|
matthiasm@106
|
607 loglikelihood.values.push_back(-log(scale[iFrame]));
|
matthiasm@106
|
608 // cerr << chordchange[iFrame] << endl;
|
matthiasm@107
|
609 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
|
matthiasm@106
|
610 }
|
matthiasm@106
|
611 logscale /= nFrame;
|
mail@111
|
612 // cerr << "loglik" << logscale << endl;
|
matthiasm@106
|
613
|
matthiasm@106
|
614
|
matthiasm@43
|
615 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
616 } else {
|
matthiasm@43
|
617 /* Simple chord estimation
|
matthiasm@43
|
618 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
619 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
620 */
|
matthiasm@44
|
621 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
622 count = 0;
|
matthiasm@43
|
623 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
624 vector<int> chordSequence;
|
matthiasm@43
|
625 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
626 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
627 scoreChordogram.push_back(temp);
|
matthiasm@43
|
628 }
|
matthiasm@43
|
629 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
630 int startIndex = count + 1;
|
matthiasm@43
|
631 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
632
|
matthiasm@43
|
633 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
634
|
matthiasm@43
|
635 vector<int> chordCandidates;
|
Chris@91
|
636 for (int iChord = 0; iChord+1 < nChord; iChord++) {
|
matthiasm@43
|
637 // float currsum = 0;
|
Chris@91
|
638 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
639 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
640 // }
|
matthiasm@43
|
641 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@91
|
642 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
643 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
644 chordCandidates.push_back(iChord);
|
matthiasm@43
|
645 break;
|
matthiasm@43
|
646 }
|
Chris@23
|
647 }
|
Chris@23
|
648 }
|
matthiasm@43
|
649 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
650 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
651
|
matthiasm@43
|
652 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
653 float maxindex = 0; //... and the index thereof
|
Chris@91
|
654 int bestchordL = nChord-1; // index of the best "left" chord
|
Chris@91
|
655 int bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
656
|
matthiasm@43
|
657 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
658 // now find the max values on both sides of iWF
|
matthiasm@43
|
659 // left side:
|
matthiasm@43
|
660 float maxL = 0;
|
Chris@91
|
661 int maxindL = nChord-1;
|
Chris@91
|
662 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
663 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
664 float currsum = 0;
|
Chris@91
|
665 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
666 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
667 }
|
matthiasm@43
|
668 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
669 if (currsum > maxL) {
|
matthiasm@43
|
670 maxL = currsum;
|
matthiasm@43
|
671 maxindL = iChord;
|
matthiasm@43
|
672 }
|
matthiasm@43
|
673 }
|
matthiasm@43
|
674 // right side:
|
matthiasm@43
|
675 float maxR = 0;
|
Chris@91
|
676 int maxindR = nChord-1;
|
Chris@91
|
677 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
678 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
679 float currsum = 0;
|
Chris@91
|
680 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
681 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
682 }
|
matthiasm@43
|
683 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
684 if (currsum > maxR) {
|
matthiasm@43
|
685 maxR = currsum;
|
matthiasm@43
|
686 maxindR = iChord;
|
matthiasm@43
|
687 }
|
matthiasm@43
|
688 }
|
matthiasm@43
|
689 if (maxL+maxR > maxval) {
|
matthiasm@43
|
690 maxval = maxL+maxR;
|
matthiasm@43
|
691 maxindex = iWF;
|
matthiasm@43
|
692 bestchordL = maxindL;
|
matthiasm@43
|
693 bestchordR = maxindR;
|
matthiasm@43
|
694 }
|
matthiasm@43
|
695
|
Chris@23
|
696 }
|
matthiasm@43
|
697 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
698 // add a score to every chord-frame-point that was part of a maximum
|
Chris@91
|
699 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
700 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
701 }
|
Chris@91
|
702 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
703 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
704 }
|
matthiasm@50
|
705 if (bestchordL != bestchordR) {
|
matthiasm@50
|
706 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
707 }
|
matthiasm@43
|
708 count++;
|
Chris@23
|
709 }
|
matthiasm@43
|
710 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
711 count = 0;
|
matthiasm@43
|
712 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
713 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
714 float maxindex = 0; //... and the index thereof
|
Chris@91
|
715 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
716 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
717 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
718 maxindex = iChord;
|
matthiasm@43
|
719 // cerr << iChord << endl;
|
matthiasm@43
|
720 }
|
matthiasm@43
|
721 }
|
matthiasm@43
|
722 chordSequence.push_back(maxindex);
|
matthiasm@43
|
723 count++;
|
Chris@23
|
724 }
|
matthiasm@43
|
725
|
matthiasm@43
|
726
|
matthiasm@43
|
727 // mode filter on chordSequence
|
matthiasm@43
|
728 count = 0;
|
matthiasm@43
|
729 string oldChord = "";
|
matthiasm@43
|
730 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
731 Feature chord_feature; // chord estimate
|
matthiasm@43
|
732 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
733 chord_feature.timestamp = *it;
|
matthiasm@43
|
734 // Feature currentChord; // chord estimate
|
matthiasm@43
|
735 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
736 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
737
|
matthiasm@43
|
738 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
739 int maxChordCount = 0;
|
matthiasm@43
|
740 int maxChordIndex = nChord-1;
|
matthiasm@43
|
741 string maxChord;
|
matthiasm@43
|
742 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
743 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
744 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
745 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
746 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
747 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
748 maxChordCount++;
|
matthiasm@43
|
749 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
750 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
751 }
|
matthiasm@43
|
752 }
|
matthiasm@43
|
753 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
754 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
755 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
756 if (oldChord != maxChord) {
|
matthiasm@43
|
757 oldChord = maxChord;
|
matthiasm@43
|
758 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
759 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@91
|
760 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
761 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
|
matthiasm@86
|
762 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
763 }
|
matthiasm@86
|
764 oldnotes.clear();
|
Chris@91
|
765 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
766 Feature chordnote_feature;
|
matthiasm@86
|
767 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
768 chordnote_feature.timestamp = chord_feature.timestamp;
|
matthiasm@86
|
769 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
|
matthiasm@86
|
770 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
771 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
|
matthiasm@86
|
772 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
773 }
|
matthiasm@43
|
774 }
|
matthiasm@43
|
775 count++;
|
Chris@23
|
776 }
|
Chris@23
|
777 }
|
matthiasm@43
|
778 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
779 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
780 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
781 chord_feature.label = "N";
|
mail@60
|
782 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
783
|
Chris@91
|
784 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
785 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
786 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
787 }
|
matthiasm@86
|
788
|
Chris@23
|
789 cerr << "done." << endl;
|
matthiasm@50
|
790
|
matthiasm@50
|
791 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
792 Feature chordchange_feature;
|
matthiasm@50
|
793 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
794 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
795 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
796 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
797 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
798 }
|
matthiasm@50
|
799
|
mail@60
|
800 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
801
|
matthiasm@50
|
802
|
Chris@23
|
803 return fsOut;
|
matthiasm@0
|
804 }
|