Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@86
|
39 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@86
|
40
|
matthiasm@0
|
41 }
|
matthiasm@0
|
42
|
Chris@35
|
43 Chordino::~Chordino()
|
matthiasm@0
|
44 {
|
Chris@35
|
45 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
46 }
|
matthiasm@0
|
47
|
matthiasm@0
|
48 string
|
Chris@35
|
49 Chordino::getIdentifier() const
|
matthiasm@0
|
50 {
|
Chris@23
|
51 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
52 return "chordino";
|
matthiasm@0
|
53 }
|
matthiasm@0
|
54
|
matthiasm@0
|
55 string
|
Chris@35
|
56 Chordino::getName() const
|
matthiasm@0
|
57 {
|
Chris@23
|
58 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
59 return "Chordino";
|
matthiasm@0
|
60 }
|
matthiasm@0
|
61
|
matthiasm@0
|
62 string
|
Chris@35
|
63 Chordino::getDescription() const
|
matthiasm@0
|
64 {
|
Chris@23
|
65 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
66 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
67 }
|
matthiasm@0
|
68
|
matthiasm@50
|
69 Chordino::ParameterList
|
matthiasm@50
|
70 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
71 {
|
matthiasm@50
|
72 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
73 ParameterList list;
|
matthiasm@50
|
74
|
matthiasm@50
|
75 ParameterDescriptor d;
|
matthiasm@50
|
76 d.identifier = "useNNLS";
|
matthiasm@50
|
77 d.name = "use approximate transcription (NNLS)";
|
matthiasm@50
|
78 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@50
|
79 d.unit = "";
|
matthiasm@50
|
80 d.minValue = 0.0;
|
matthiasm@50
|
81 d.maxValue = 1.0;
|
matthiasm@50
|
82 d.defaultValue = 1.0;
|
matthiasm@50
|
83 d.isQuantized = true;
|
matthiasm@50
|
84 d.quantizeStep = 1.0;
|
matthiasm@50
|
85 list.push_back(d);
|
matthiasm@50
|
86
|
matthiasm@50
|
87 ParameterDescriptor d4;
|
matthiasm@50
|
88 d4.identifier = "useHMM";
|
matthiasm@53
|
89 d4.name = "HMM (Viterbi decoding)";
|
matthiasm@50
|
90 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
matthiasm@50
|
91 d4.unit = "";
|
matthiasm@50
|
92 d4.minValue = 0.0;
|
matthiasm@50
|
93 d4.maxValue = 1.0;
|
matthiasm@50
|
94 d4.defaultValue = 1.0;
|
matthiasm@50
|
95 d4.isQuantized = true;
|
matthiasm@50
|
96 d4.quantizeStep = 1.0;
|
matthiasm@50
|
97 list.push_back(d4);
|
matthiasm@50
|
98
|
matthiasm@50
|
99 ParameterDescriptor d0;
|
matthiasm@50
|
100 d0.identifier = "rollon";
|
matthiasm@50
|
101 d0.name = "spectral roll-on";
|
matthiasm@58
|
102 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
103 d0.unit = "%";
|
matthiasm@50
|
104 d0.minValue = 0;
|
mail@76
|
105 d0.maxValue = 5;
|
matthiasm@92
|
106 d0.defaultValue = 0.0;
|
matthiasm@50
|
107 d0.isQuantized = true;
|
mail@76
|
108 d0.quantizeStep = 0.5;
|
matthiasm@50
|
109 list.push_back(d0);
|
matthiasm@50
|
110
|
matthiasm@50
|
111 ParameterDescriptor d1;
|
matthiasm@50
|
112 d1.identifier = "tuningmode";
|
matthiasm@50
|
113 d1.name = "tuning mode";
|
matthiasm@50
|
114 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@50
|
115 d1.unit = "";
|
matthiasm@50
|
116 d1.minValue = 0;
|
matthiasm@50
|
117 d1.maxValue = 1;
|
matthiasm@92
|
118 d1.defaultValue = 0.0;
|
matthiasm@50
|
119 d1.isQuantized = true;
|
matthiasm@50
|
120 d1.valueNames.push_back("global tuning");
|
matthiasm@50
|
121 d1.valueNames.push_back("local tuning");
|
matthiasm@50
|
122 d1.quantizeStep = 1.0;
|
matthiasm@50
|
123 list.push_back(d1);
|
matthiasm@50
|
124
|
matthiasm@50
|
125 ParameterDescriptor d2;
|
matthiasm@50
|
126 d2.identifier = "whitening";
|
matthiasm@50
|
127 d2.name = "spectral whitening";
|
matthiasm@50
|
128 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
matthiasm@50
|
129 d2.unit = "";
|
matthiasm@50
|
130 d2.isQuantized = true;
|
matthiasm@50
|
131 d2.minValue = 0.0;
|
matthiasm@50
|
132 d2.maxValue = 1.0;
|
matthiasm@50
|
133 d2.defaultValue = 1.0;
|
matthiasm@50
|
134 d2.isQuantized = false;
|
matthiasm@50
|
135 list.push_back(d2);
|
matthiasm@50
|
136
|
matthiasm@50
|
137 ParameterDescriptor d3;
|
matthiasm@50
|
138 d3.identifier = "s";
|
matthiasm@50
|
139 d3.name = "spectral shape";
|
matthiasm@50
|
140 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
matthiasm@50
|
141 d3.unit = "";
|
matthiasm@50
|
142 d3.minValue = 0.5;
|
matthiasm@50
|
143 d3.maxValue = 0.9;
|
matthiasm@50
|
144 d3.defaultValue = 0.7;
|
matthiasm@50
|
145 d3.isQuantized = false;
|
matthiasm@50
|
146 list.push_back(d3);
|
matthiasm@50
|
147
|
mail@89
|
148 ParameterDescriptor boostn;
|
mail@89
|
149 boostn.identifier = "boostn";
|
mail@89
|
150 boostn.name = "boost N";
|
matthiasm@95
|
151 boostn.description = "Boost likelihood of the N (no chord) label.";
|
mail@89
|
152 boostn.unit = "";
|
matthiasm@95
|
153 boostn.minValue = 0.0;
|
matthiasm@95
|
154 boostn.maxValue = 1.0;
|
matthiasm@95
|
155 boostn.defaultValue = 0.1;
|
mail@89
|
156 boostn.isQuantized = false;
|
mail@89
|
157 list.push_back(boostn);
|
matthiasm@50
|
158
|
matthiasm@50
|
159 return list;
|
matthiasm@50
|
160 }
|
matthiasm@50
|
161
|
Chris@35
|
162 Chordino::OutputList
|
Chris@35
|
163 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
164 {
|
Chris@23
|
165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
166 OutputList list;
|
matthiasm@0
|
167
|
Chris@35
|
168 int index = 0;
|
matthiasm@0
|
169
|
matthiasm@0
|
170 OutputDescriptor d7;
|
matthiasm@0
|
171 d7.identifier = "simplechord";
|
Chris@36
|
172 d7.name = "Chord Estimate";
|
matthiasm@58
|
173 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
174 d7.unit = "";
|
matthiasm@0
|
175 d7.hasFixedBinCount = true;
|
matthiasm@0
|
176 d7.binCount = 0;
|
matthiasm@0
|
177 d7.hasKnownExtents = false;
|
matthiasm@0
|
178 d7.isQuantized = false;
|
matthiasm@0
|
179 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
180 d7.hasDuration = false;
|
matthiasm@0
|
181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
182 list.push_back(d7);
|
Chris@35
|
183 m_outputChords = index++;
|
matthiasm@0
|
184
|
matthiasm@86
|
185 OutputDescriptor chordnotes;
|
matthiasm@86
|
186 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
187 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
188 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
189 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
190 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
191 chordnotes.binCount = 1;
|
matthiasm@86
|
192 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
193 chordnotes.minValue = 0;
|
matthiasm@86
|
194 chordnotes.maxValue = 127;
|
matthiasm@86
|
195 chordnotes.isQuantized = true;
|
matthiasm@86
|
196 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
198 chordnotes.hasDuration = true;
|
matthiasm@86
|
199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
200 list.push_back(chordnotes);
|
matthiasm@86
|
201 m_outputChordnotes = index++;
|
matthiasm@86
|
202
|
Chris@23
|
203 OutputDescriptor d8;
|
mail@60
|
204 d8.identifier = "harmonicchange";
|
Chris@36
|
205 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
206 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
207 d8.unit = "";
|
matthiasm@17
|
208 d8.hasFixedBinCount = true;
|
matthiasm@17
|
209 d8.binCount = 1;
|
mail@60
|
210 d8.hasKnownExtents = false;
|
mail@60
|
211 // d8.minValue = 0.0;
|
mail@60
|
212 // d8.maxValue = 0.999;
|
matthiasm@17
|
213 d8.isQuantized = false;
|
matthiasm@17
|
214 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
215 d8.hasDuration = false;
|
matthiasm@17
|
216 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
217 list.push_back(d8);
|
Chris@35
|
218 m_outputHarmonicChange = index++;
|
matthiasm@1
|
219
|
matthiasm@107
|
220 OutputDescriptor loglikelihood;
|
matthiasm@107
|
221 loglikelihood.identifier = "loglikelihood";
|
matthiasm@107
|
222 loglikelihood.name = "chord estimate log-likelihood";
|
matthiasm@107
|
223 loglikelihood.description = ".";
|
matthiasm@107
|
224 loglikelihood.unit = "";
|
matthiasm@107
|
225 loglikelihood.hasFixedBinCount = true;
|
matthiasm@107
|
226 loglikelihood.binCount = 1;
|
matthiasm@107
|
227 loglikelihood.hasKnownExtents = false;
|
matthiasm@107
|
228 loglikelihood.isQuantized = false;
|
matthiasm@107
|
229 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@107
|
230 loglikelihood.hasDuration = false;
|
matthiasm@107
|
231 // loglikelihood.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@107
|
232 list.push_back(loglikelihood);
|
matthiasm@107
|
233 m_outputLoglikelihood = index++;
|
matthiasm@106
|
234
|
matthiasm@0
|
235 return list;
|
matthiasm@0
|
236 }
|
matthiasm@0
|
237
|
matthiasm@0
|
238 bool
|
Chris@35
|
239 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
240 {
|
Chris@23
|
241 if (debug_on) {
|
Chris@23
|
242 cerr << "--> initialise";
|
Chris@23
|
243 }
|
mail@76
|
244
|
Chris@35
|
245 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
246 return false;
|
Chris@35
|
247 }
|
mail@89
|
248 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN);
|
matthiasm@0
|
249 return true;
|
matthiasm@0
|
250 }
|
matthiasm@0
|
251
|
matthiasm@0
|
252 void
|
Chris@35
|
253 Chordino::reset()
|
matthiasm@0
|
254 {
|
Chris@23
|
255 if (debug_on) cerr << "--> reset";
|
Chris@35
|
256 NNLSBase::reset();
|
matthiasm@0
|
257 }
|
matthiasm@0
|
258
|
Chris@35
|
259 Chordino::FeatureSet
|
Chris@35
|
260 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
261 {
|
Chris@23
|
262 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
263
|
Chris@35
|
264 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
265
|
Chris@35
|
266 return FeatureSet();
|
matthiasm@0
|
267 }
|
matthiasm@0
|
268
|
Chris@35
|
269 Chordino::FeatureSet
|
Chris@35
|
270 Chordino::getRemainingFeatures()
|
matthiasm@0
|
271 {
|
mail@89
|
272 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
273 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
274 FeatureSet fsOut;
|
Chris@35
|
275 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
276 int nChord = m_chordnames.size();
|
Chris@23
|
277 //
|
Chris@23
|
278 /** Calculate Tuning
|
Chris@23
|
279 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
280 cumulative mean real and imag values)
|
Chris@23
|
281 **/
|
mail@80
|
282 float meanTuningImag = 0;
|
mail@80
|
283 float meanTuningReal = 0;
|
mail@80
|
284 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
285 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
286 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
287 }
|
Chris@23
|
288 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
289 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
290 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
291 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
292
|
Chris@23
|
293 char buffer0 [50];
|
matthiasm@1
|
294
|
Chris@23
|
295 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
296
|
matthiasm@1
|
297
|
Chris@23
|
298 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
299 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
300 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
301 **/
|
Chris@35
|
302 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
303
|
Chris@23
|
304 float tempValue = 0;
|
Chris@23
|
305 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
306 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
307 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
308 int count = 0;
|
matthiasm@1
|
309
|
Chris@35
|
310 FeatureList tunedSpec;
|
matthiasm@43
|
311 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
312
|
matthiasm@43
|
313 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
314
|
Chris@35
|
315 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
316 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
317 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
318 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
319 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
320 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
321 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
322
|
Chris@23
|
323 if (m_tuneLocal) {
|
Chris@23
|
324 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
325 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
326 }
|
matthiasm@1
|
327
|
mail@80
|
328 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
329
|
Chris@91
|
330 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@91
|
331 tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
332 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
333 }
|
matthiasm@1
|
334
|
matthiasm@43
|
335 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
336 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
337 vector<float> runningstd;
|
mail@77
|
338 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
339 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
340 }
|
Chris@23
|
341 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
342 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
343 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
344 if (runningstd[i] > 0) {
|
matthiasm@43
|
345 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
346 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
347 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
348 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
349 }
|
matthiasm@43
|
350 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
351 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
352 }
|
Chris@23
|
353 }
|
matthiasm@43
|
354 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
355 count++;
|
Chris@23
|
356 }
|
Chris@23
|
357 cerr << "done." << endl;
|
matthiasm@1
|
358
|
Chris@23
|
359 /** Semitone spectrum and chromagrams
|
Chris@23
|
360 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
361 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
362 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
363 bass and treble stacked onto each other).
|
Chris@23
|
364 **/
|
matthiasm@42
|
365 if (m_useNNLS == 0) {
|
Chris@35
|
366 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
367 } else {
|
Chris@35
|
368 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
369 }
|
matthiasm@13
|
370
|
matthiasm@1
|
371
|
matthiasm@43
|
372 vector<vector<double> > chordogram;
|
Chris@23
|
373 vector<vector<int> > scoreChordogram;
|
Chris@35
|
374 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
375 count = 0;
|
matthiasm@9
|
376
|
Chris@35
|
377 FeatureList chromaList;
|
matthiasm@43
|
378
|
matthiasm@43
|
379
|
Chris@35
|
380
|
Chris@35
|
381 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
382 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
383 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
384
|
matthiasm@43
|
385 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
386 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
387
|
mail@77
|
388 float b[nNote];
|
matthiasm@1
|
389
|
Chris@23
|
390 bool some_b_greater_zero = false;
|
Chris@23
|
391 float sumb = 0;
|
mail@77
|
392 for (int i = 0; i < nNote; i++) {
|
mail@77
|
393 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
394 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
395 sumb += b[i];
|
Chris@23
|
396 if (b[i] > 0) {
|
Chris@23
|
397 some_b_greater_zero = true;
|
Chris@23
|
398 }
|
Chris@23
|
399 }
|
matthiasm@1
|
400
|
Chris@23
|
401 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
402
|
Chris@23
|
403 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
404 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
405 float currval;
|
Chris@91
|
406 int iSemitone = 0;
|
matthiasm@1
|
407
|
Chris@23
|
408 if (some_b_greater_zero) {
|
matthiasm@42
|
409 if (m_useNNLS == 0) {
|
Chris@91
|
410 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
411 currval = 0;
|
mail@81
|
412 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
413 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
414 }
|
Chris@23
|
415 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
416 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
417 iSemitone++;
|
Chris@23
|
418 }
|
matthiasm@1
|
419
|
Chris@23
|
420 } else {
|
Chris@35
|
421 float x[84+1000];
|
Chris@23
|
422 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
423 vector<int> signifIndex;
|
Chris@23
|
424 int index=0;
|
Chris@23
|
425 sumb /= 84.0;
|
Chris@91
|
426 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
427 float currval = 0;
|
mail@81
|
428 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
429 currval += b[iNote + iBPS];
|
mail@81
|
430 }
|
Chris@23
|
431 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
432 index++;
|
Chris@23
|
433 }
|
Chris@35
|
434 float rnorm;
|
Chris@35
|
435 float w[84+1000];
|
Chris@35
|
436 float zz[84+1000];
|
Chris@23
|
437 int indx[84+1000];
|
Chris@23
|
438 int mode;
|
mail@77
|
439 int dictsize = nNote*signifIndex.size();
|
mail@81
|
440 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
441 float *curr_dict = new float[dictsize];
|
Chris@91
|
442 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
443 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
444 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
445 }
|
Chris@23
|
446 }
|
Chris@35
|
447 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
448 delete [] curr_dict;
|
Chris@91
|
449 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
450 // cerr << mode << endl;
|
Chris@23
|
451 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
452 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
453 }
|
Chris@23
|
454 }
|
Chris@23
|
455 }
|
Chris@35
|
456
|
Chris@35
|
457 vector<float> origchroma = chroma;
|
Chris@23
|
458 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
459 currentChromas.values = chroma;
|
Chris@35
|
460
|
Chris@23
|
461 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
462 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
463 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
464 case 0: // should never end up here
|
Chris@23
|
465 break;
|
Chris@23
|
466 case 1:
|
Chris@35
|
467 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
468 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
469 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
470 break;
|
Chris@23
|
471 case 2:
|
Chris@35
|
472 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
473 chromanorm[2] += *it;
|
Chris@23
|
474 }
|
Chris@23
|
475 break;
|
Chris@23
|
476 case 3:
|
Chris@35
|
477 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
478 chromanorm[2] += pow(*it,2);
|
Chris@23
|
479 }
|
Chris@23
|
480 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
481 break;
|
Chris@23
|
482 }
|
Chris@23
|
483 if (chromanorm[2] > 0) {
|
Chris@91
|
484 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
485 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
486 }
|
Chris@23
|
487 }
|
Chris@23
|
488 }
|
Chris@35
|
489
|
matthiasm@43
|
490 chromaList.push_back(currentChromas);
|
Chris@35
|
491
|
Chris@23
|
492 // local chord estimation
|
matthiasm@43
|
493 vector<double> currentChordSalience;
|
matthiasm@43
|
494 double tempchordvalue = 0;
|
matthiasm@43
|
495 double sumchordvalue = 0;
|
matthiasm@9
|
496
|
Chris@23
|
497 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
498 tempchordvalue = 0;
|
Chris@23
|
499 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
500 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
501 }
|
Chris@23
|
502 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
503 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
504 }
|
matthiasm@48
|
505 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
506 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
507 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
508 sumchordvalue+=tempchordvalue;
|
Chris@23
|
509 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
510 }
|
Chris@23
|
511 if (sumchordvalue > 0) {
|
Chris@23
|
512 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
513 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
514 }
|
Chris@23
|
515 } else {
|
Chris@23
|
516 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
517 }
|
Chris@23
|
518 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
519
|
Chris@23
|
520 count++;
|
Chris@23
|
521 }
|
Chris@23
|
522 cerr << "done." << endl;
|
matthiasm@13
|
523
|
matthiasm@86
|
524 vector<Feature> oldnotes;
|
matthiasm@10
|
525
|
matthiasm@50
|
526 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
527 if (m_useHMM == 1.0) {
|
matthiasm@44
|
528 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
529 int oldchord = nChord-1;
|
matthiasm@48
|
530 double selftransprob = 0.99;
|
matthiasm@43
|
531
|
matthiasm@48
|
532 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
533 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
534
|
matthiasm@50
|
535 double *delta;
|
matthiasm@50
|
536 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
537
|
matthiasm@43
|
538 vector<vector<double> > trans;
|
matthiasm@43
|
539 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
540 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
541 temp[iChord] = selftransprob;
|
matthiasm@43
|
542 trans.push_back(temp);
|
matthiasm@43
|
543 }
|
matthiasm@106
|
544 vector<double> scale;
|
matthiasm@106
|
545 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
|
matthiasm@106
|
546
|
matthiasm@48
|
547
|
matthiasm@48
|
548 Feature chord_feature; // chord estimate
|
matthiasm@48
|
549 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
550 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
551 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
552 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
553
|
mail@60
|
554 chordchange[0] = 0;
|
Chris@91
|
555 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@43
|
556 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
557 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@86
|
558 // chord
|
matthiasm@43
|
559 Feature chord_feature; // chord estimate
|
matthiasm@43
|
560 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
561 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
562 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
563 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
564 oldchord = chordpath[iFrame];
|
matthiasm@86
|
565 // chord notes
|
Chris@91
|
566 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
567 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@86
|
568 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
569 }
|
matthiasm@86
|
570 oldnotes.clear();
|
Chris@91
|
571 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
572 Feature chordnote_feature;
|
matthiasm@86
|
573 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
574 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@86
|
575 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@86
|
576 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
577 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@86
|
578 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
579 }
|
Chris@23
|
580 }
|
matthiasm@50
|
581 /* calculating simple chord change prob */
|
matthiasm@50
|
582 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
583 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
584 }
|
Chris@23
|
585 }
|
matthiasm@43
|
586
|
matthiasm@106
|
587 float logscale = 0;
|
matthiasm@106
|
588 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@106
|
589 logscale -= log(scale[iFrame]);
|
matthiasm@106
|
590 Feature loglikelihood;
|
matthiasm@106
|
591 loglikelihood.hasTimestamp = true;
|
matthiasm@106
|
592 loglikelihood.timestamp = timestamps[iFrame];
|
matthiasm@106
|
593 loglikelihood.values.push_back(-log(scale[iFrame]));
|
matthiasm@106
|
594 // cerr << chordchange[iFrame] << endl;
|
matthiasm@107
|
595 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
|
matthiasm@106
|
596 }
|
matthiasm@106
|
597 logscale /= nFrame;
|
mail@111
|
598 // cerr << "loglik" << logscale << endl;
|
matthiasm@106
|
599
|
matthiasm@106
|
600
|
matthiasm@43
|
601 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
602 } else {
|
matthiasm@43
|
603 /* Simple chord estimation
|
matthiasm@43
|
604 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
605 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
606 */
|
matthiasm@44
|
607 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
608 count = 0;
|
matthiasm@43
|
609 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
610 vector<int> chordSequence;
|
matthiasm@43
|
611 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
612 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
613 scoreChordogram.push_back(temp);
|
matthiasm@43
|
614 }
|
matthiasm@43
|
615 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
616 int startIndex = count + 1;
|
matthiasm@43
|
617 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
618
|
matthiasm@43
|
619 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
620
|
matthiasm@43
|
621 vector<int> chordCandidates;
|
Chris@91
|
622 for (int iChord = 0; iChord+1 < nChord; iChord++) {
|
matthiasm@43
|
623 // float currsum = 0;
|
Chris@91
|
624 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
625 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
626 // }
|
matthiasm@43
|
627 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@91
|
628 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
629 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
630 chordCandidates.push_back(iChord);
|
matthiasm@43
|
631 break;
|
matthiasm@43
|
632 }
|
Chris@23
|
633 }
|
Chris@23
|
634 }
|
matthiasm@43
|
635 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
636 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
637
|
matthiasm@43
|
638 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
639 float maxindex = 0; //... and the index thereof
|
Chris@91
|
640 int bestchordL = nChord-1; // index of the best "left" chord
|
Chris@91
|
641 int bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
642
|
matthiasm@43
|
643 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
644 // now find the max values on both sides of iWF
|
matthiasm@43
|
645 // left side:
|
matthiasm@43
|
646 float maxL = 0;
|
Chris@91
|
647 int maxindL = nChord-1;
|
Chris@91
|
648 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
649 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
650 float currsum = 0;
|
Chris@91
|
651 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
652 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
653 }
|
matthiasm@43
|
654 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
655 if (currsum > maxL) {
|
matthiasm@43
|
656 maxL = currsum;
|
matthiasm@43
|
657 maxindL = iChord;
|
matthiasm@43
|
658 }
|
matthiasm@43
|
659 }
|
matthiasm@43
|
660 // right side:
|
matthiasm@43
|
661 float maxR = 0;
|
Chris@91
|
662 int maxindR = nChord-1;
|
Chris@91
|
663 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
664 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
665 float currsum = 0;
|
Chris@91
|
666 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
667 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
668 }
|
matthiasm@43
|
669 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
670 if (currsum > maxR) {
|
matthiasm@43
|
671 maxR = currsum;
|
matthiasm@43
|
672 maxindR = iChord;
|
matthiasm@43
|
673 }
|
matthiasm@43
|
674 }
|
matthiasm@43
|
675 if (maxL+maxR > maxval) {
|
matthiasm@43
|
676 maxval = maxL+maxR;
|
matthiasm@43
|
677 maxindex = iWF;
|
matthiasm@43
|
678 bestchordL = maxindL;
|
matthiasm@43
|
679 bestchordR = maxindR;
|
matthiasm@43
|
680 }
|
matthiasm@43
|
681
|
Chris@23
|
682 }
|
matthiasm@43
|
683 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
684 // add a score to every chord-frame-point that was part of a maximum
|
Chris@91
|
685 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
686 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
687 }
|
Chris@91
|
688 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
689 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
690 }
|
matthiasm@50
|
691 if (bestchordL != bestchordR) {
|
matthiasm@50
|
692 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
693 }
|
matthiasm@43
|
694 count++;
|
Chris@23
|
695 }
|
matthiasm@43
|
696 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
697 count = 0;
|
matthiasm@43
|
698 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
699 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
700 float maxindex = 0; //... and the index thereof
|
Chris@91
|
701 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
702 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
703 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
704 maxindex = iChord;
|
matthiasm@43
|
705 // cerr << iChord << endl;
|
matthiasm@43
|
706 }
|
matthiasm@43
|
707 }
|
matthiasm@43
|
708 chordSequence.push_back(maxindex);
|
matthiasm@43
|
709 count++;
|
Chris@23
|
710 }
|
matthiasm@43
|
711
|
matthiasm@43
|
712
|
matthiasm@43
|
713 // mode filter on chordSequence
|
matthiasm@43
|
714 count = 0;
|
matthiasm@43
|
715 string oldChord = "";
|
matthiasm@43
|
716 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
717 Feature chord_feature; // chord estimate
|
matthiasm@43
|
718 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
719 chord_feature.timestamp = *it;
|
matthiasm@43
|
720 // Feature currentChord; // chord estimate
|
matthiasm@43
|
721 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
722 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
723
|
matthiasm@43
|
724 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
725 int maxChordCount = 0;
|
matthiasm@43
|
726 int maxChordIndex = nChord-1;
|
matthiasm@43
|
727 string maxChord;
|
matthiasm@43
|
728 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
729 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
730 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
731 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
732 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
733 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
734 maxChordCount++;
|
matthiasm@43
|
735 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
736 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
737 }
|
matthiasm@43
|
738 }
|
matthiasm@43
|
739 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
740 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
741 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
742 if (oldChord != maxChord) {
|
matthiasm@43
|
743 oldChord = maxChord;
|
matthiasm@43
|
744 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
745 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@91
|
746 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
747 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
|
matthiasm@86
|
748 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
749 }
|
matthiasm@86
|
750 oldnotes.clear();
|
Chris@91
|
751 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
752 Feature chordnote_feature;
|
matthiasm@86
|
753 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
754 chordnote_feature.timestamp = chord_feature.timestamp;
|
matthiasm@86
|
755 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
|
matthiasm@86
|
756 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
757 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
|
matthiasm@86
|
758 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
759 }
|
matthiasm@43
|
760 }
|
matthiasm@43
|
761 count++;
|
Chris@23
|
762 }
|
Chris@23
|
763 }
|
matthiasm@43
|
764 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
765 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
766 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
767 chord_feature.label = "N";
|
mail@60
|
768 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
769
|
Chris@91
|
770 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
771 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
772 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
773 }
|
matthiasm@86
|
774
|
Chris@23
|
775 cerr << "done." << endl;
|
matthiasm@50
|
776
|
matthiasm@50
|
777 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
778 Feature chordchange_feature;
|
matthiasm@50
|
779 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
780 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
781 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
782 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
783 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
784 }
|
matthiasm@50
|
785
|
mail@60
|
786 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
787
|
matthiasm@50
|
788
|
Chris@23
|
789 return fsOut;
|
matthiasm@0
|
790 }
|