Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@27
|
32 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
33
|
Chris@35
|
34 Chordino::Chordino(float inputSampleRate) :
|
Chris@35
|
35 NNLSBase(inputSampleRate)
|
matthiasm@0
|
36 {
|
Chris@35
|
37 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
38 }
|
matthiasm@0
|
39
|
Chris@35
|
40 Chordino::~Chordino()
|
matthiasm@0
|
41 {
|
Chris@35
|
42 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
43 }
|
matthiasm@0
|
44
|
matthiasm@0
|
45 string
|
Chris@35
|
46 Chordino::getIdentifier() const
|
matthiasm@0
|
47 {
|
Chris@23
|
48 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
49 return "chordino";
|
matthiasm@0
|
50 }
|
matthiasm@0
|
51
|
matthiasm@0
|
52 string
|
Chris@35
|
53 Chordino::getName() const
|
matthiasm@0
|
54 {
|
Chris@23
|
55 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
56 return "Chordino";
|
matthiasm@0
|
57 }
|
matthiasm@0
|
58
|
matthiasm@0
|
59 string
|
Chris@35
|
60 Chordino::getDescription() const
|
matthiasm@0
|
61 {
|
Chris@23
|
62 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@13
|
63 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
|
matthiasm@0
|
64 }
|
matthiasm@0
|
65
|
Chris@35
|
66 Chordino::OutputList
|
Chris@35
|
67 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
68 {
|
Chris@23
|
69 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
70 OutputList list;
|
matthiasm@0
|
71
|
Chris@35
|
72 int index = 0;
|
matthiasm@0
|
73
|
matthiasm@0
|
74 OutputDescriptor d7;
|
matthiasm@0
|
75 d7.identifier = "simplechord";
|
Chris@36
|
76 d7.name = "Chord Estimate";
|
matthiasm@0
|
77 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
78 d7.unit = "";
|
matthiasm@0
|
79 d7.hasFixedBinCount = true;
|
matthiasm@0
|
80 d7.binCount = 0;
|
matthiasm@0
|
81 d7.hasKnownExtents = false;
|
matthiasm@0
|
82 d7.isQuantized = false;
|
matthiasm@0
|
83 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
84 d7.hasDuration = false;
|
matthiasm@0
|
85 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
86 list.push_back(d7);
|
Chris@35
|
87 m_outputChords = index++;
|
matthiasm@0
|
88
|
Chris@23
|
89 OutputDescriptor d8;
|
matthiasm@17
|
90 d8.identifier = "harmonicchange";
|
Chris@36
|
91 d8.name = "Harmonic Change Value";
|
matthiasm@17
|
92 d8.description = "Harmonic change.";
|
matthiasm@17
|
93 d8.unit = "";
|
matthiasm@17
|
94 d8.hasFixedBinCount = true;
|
matthiasm@17
|
95 d8.binCount = 1;
|
matthiasm@17
|
96 d8.hasKnownExtents = true;
|
Chris@23
|
97 d8.minValue = 0.0;
|
Chris@23
|
98 d8.maxValue = 0.999;
|
matthiasm@17
|
99 d8.isQuantized = false;
|
matthiasm@17
|
100 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
101 d8.hasDuration = false;
|
matthiasm@17
|
102 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
103 list.push_back(d8);
|
Chris@35
|
104 m_outputHarmonicChange = index++;
|
matthiasm@1
|
105
|
matthiasm@0
|
106 return list;
|
matthiasm@0
|
107 }
|
matthiasm@0
|
108
|
matthiasm@0
|
109 bool
|
Chris@35
|
110 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
111 {
|
Chris@23
|
112 if (debug_on) {
|
Chris@23
|
113 cerr << "--> initialise";
|
Chris@23
|
114 }
|
matthiasm@1
|
115
|
Chris@35
|
116 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
117 return false;
|
Chris@35
|
118 }
|
matthiasm@1
|
119
|
matthiasm@0
|
120 return true;
|
matthiasm@0
|
121 }
|
matthiasm@0
|
122
|
matthiasm@0
|
123 void
|
Chris@35
|
124 Chordino::reset()
|
matthiasm@0
|
125 {
|
Chris@23
|
126 if (debug_on) cerr << "--> reset";
|
Chris@35
|
127 NNLSBase::reset();
|
matthiasm@0
|
128 }
|
matthiasm@0
|
129
|
Chris@35
|
130 Chordino::FeatureSet
|
Chris@35
|
131 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
132 {
|
Chris@23
|
133 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
134
|
Chris@35
|
135 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
136
|
Chris@35
|
137 return FeatureSet();
|
matthiasm@0
|
138 }
|
matthiasm@0
|
139
|
Chris@35
|
140 Chordino::FeatureSet
|
Chris@35
|
141 Chordino::getRemainingFeatures()
|
matthiasm@0
|
142 {
|
Chris@23
|
143 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
144 FeatureSet fsOut;
|
Chris@35
|
145 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
146 int nChord = m_chordnames.size();
|
Chris@23
|
147 //
|
Chris@23
|
148 /** Calculate Tuning
|
Chris@23
|
149 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
150 cumulative mean real and imag values)
|
Chris@23
|
151 **/
|
Chris@23
|
152 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
153 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
154 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
155 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
156 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
157 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
158
|
Chris@23
|
159 char buffer0 [50];
|
matthiasm@1
|
160
|
Chris@23
|
161 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
162
|
matthiasm@1
|
163
|
Chris@23
|
164 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
165 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
matthiasm@43
|
166 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
|
Chris@23
|
167 **/
|
Chris@35
|
168 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
169
|
Chris@23
|
170 float tempValue = 0;
|
Chris@23
|
171 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
172 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
173 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
174 int count = 0;
|
matthiasm@1
|
175
|
Chris@35
|
176 FeatureList tunedSpec;
|
matthiasm@43
|
177 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
178
|
matthiasm@43
|
179 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
180
|
Chris@35
|
181 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
matthiasm@43
|
182 Feature currentLogSpectum = *i;
|
matthiasm@43
|
183 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
184 currentTunedSpec.hasTimestamp = true;
|
matthiasm@43
|
185 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
|
matthiasm@43
|
186 timestamps.push_back(currentLogSpectum.timestamp);
|
matthiasm@43
|
187 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
188
|
Chris@23
|
189 if (m_tuneLocal) {
|
Chris@23
|
190 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
191 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
192 }
|
matthiasm@1
|
193
|
Chris@23
|
194 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
195
|
matthiasm@43
|
196 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@43
|
197 tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor;
|
matthiasm@43
|
198 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
199 }
|
matthiasm@1
|
200
|
matthiasm@43
|
201 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
202 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
203 vector<float> runningstd;
|
Chris@23
|
204 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
205 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
206 }
|
Chris@23
|
207 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
208 for (int i = 0; i < 256; i++) {
|
Chris@23
|
209 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
210 if (runningstd[i] > 0) {
|
matthiasm@43
|
211 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
212 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
213 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
214 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
215 }
|
matthiasm@43
|
216 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
217 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
218 }
|
Chris@23
|
219 }
|
matthiasm@43
|
220 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
221 count++;
|
Chris@23
|
222 }
|
Chris@23
|
223 cerr << "done." << endl;
|
matthiasm@1
|
224
|
Chris@23
|
225 /** Semitone spectrum and chromagrams
|
Chris@23
|
226 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
227 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
228 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
229 bass and treble stacked onto each other).
|
Chris@23
|
230 **/
|
matthiasm@42
|
231 if (m_useNNLS == 0) {
|
Chris@35
|
232 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
233 } else {
|
Chris@35
|
234 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
235 }
|
matthiasm@13
|
236
|
matthiasm@1
|
237
|
matthiasm@43
|
238 vector<vector<double> > chordogram;
|
Chris@23
|
239 vector<vector<int> > scoreChordogram;
|
Chris@35
|
240 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
241 count = 0;
|
matthiasm@9
|
242
|
Chris@35
|
243 FeatureList chromaList;
|
matthiasm@43
|
244
|
matthiasm@43
|
245
|
Chris@35
|
246
|
Chris@35
|
247 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
248 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
249 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
250
|
matthiasm@43
|
251 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
252 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
253
|
Chris@35
|
254 float b[256];
|
matthiasm@1
|
255
|
Chris@23
|
256 bool some_b_greater_zero = false;
|
Chris@23
|
257 float sumb = 0;
|
Chris@23
|
258 for (int i = 0; i < 256; i++) {
|
Chris@23
|
259 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@43
|
260 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
261 sumb += b[i];
|
Chris@23
|
262 if (b[i] > 0) {
|
Chris@23
|
263 some_b_greater_zero = true;
|
Chris@23
|
264 }
|
Chris@23
|
265 }
|
matthiasm@1
|
266
|
Chris@23
|
267 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
268
|
Chris@23
|
269 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
270 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
271 float currval;
|
Chris@23
|
272 unsigned iSemitone = 0;
|
matthiasm@1
|
273
|
Chris@23
|
274 if (some_b_greater_zero) {
|
matthiasm@42
|
275 if (m_useNNLS == 0) {
|
Chris@23
|
276 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
277 currval = 0;
|
Chris@35
|
278 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@35
|
279 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@35
|
280 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
281 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
282 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
283 iSemitone++;
|
Chris@23
|
284 }
|
matthiasm@1
|
285
|
Chris@23
|
286 } else {
|
Chris@35
|
287 float x[84+1000];
|
Chris@23
|
288 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
289 vector<int> signifIndex;
|
Chris@23
|
290 int index=0;
|
Chris@23
|
291 sumb /= 84.0;
|
Chris@23
|
292 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
293 float currval = 0;
|
Chris@23
|
294 currval += b[iNote + 1 + -1];
|
Chris@23
|
295 currval += b[iNote + 1 + 0];
|
Chris@23
|
296 currval += b[iNote + 1 + 1];
|
Chris@23
|
297 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
298 index++;
|
Chris@23
|
299 }
|
Chris@35
|
300 float rnorm;
|
Chris@35
|
301 float w[84+1000];
|
Chris@35
|
302 float zz[84+1000];
|
Chris@23
|
303 int indx[84+1000];
|
Chris@23
|
304 int mode;
|
Chris@23
|
305 int dictsize = 256*signifIndex.size();
|
Chris@35
|
306 float *curr_dict = new float[dictsize];
|
Chris@23
|
307 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
308 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
309 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
310 }
|
Chris@23
|
311 }
|
Chris@35
|
312 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
313 delete [] curr_dict;
|
Chris@23
|
314 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
315 // cerr << mode << endl;
|
Chris@23
|
316 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
317 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
318 }
|
Chris@23
|
319 }
|
Chris@23
|
320 }
|
Chris@35
|
321
|
Chris@35
|
322 vector<float> origchroma = chroma;
|
Chris@23
|
323 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
324 currentChromas.values = chroma;
|
Chris@35
|
325
|
Chris@23
|
326 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
327 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
328 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
329 case 0: // should never end up here
|
Chris@23
|
330 break;
|
Chris@23
|
331 case 1:
|
Chris@35
|
332 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
333 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
334 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
335 break;
|
Chris@23
|
336 case 2:
|
Chris@35
|
337 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
338 chromanorm[2] += *it;
|
Chris@23
|
339 }
|
Chris@23
|
340 break;
|
Chris@23
|
341 case 3:
|
Chris@35
|
342 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
343 chromanorm[2] += pow(*it,2);
|
Chris@23
|
344 }
|
Chris@23
|
345 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
346 break;
|
Chris@23
|
347 }
|
Chris@23
|
348 if (chromanorm[2] > 0) {
|
Chris@35
|
349 for (int i = 0; i < chroma.size(); i++) {
|
matthiasm@43
|
350 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
351 }
|
Chris@23
|
352 }
|
Chris@23
|
353 }
|
Chris@35
|
354
|
matthiasm@43
|
355 chromaList.push_back(currentChromas);
|
Chris@35
|
356
|
Chris@23
|
357 // local chord estimation
|
matthiasm@43
|
358 vector<double> currentChordSalience;
|
matthiasm@43
|
359 double tempchordvalue = 0;
|
matthiasm@43
|
360 double sumchordvalue = 0;
|
matthiasm@9
|
361
|
Chris@23
|
362 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
363 tempchordvalue = 0;
|
Chris@23
|
364 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
365 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
366 }
|
Chris@23
|
367 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
368 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
369 }
|
matthiasm@44
|
370 if (tempchordvalue < 0) tempchordvalue = 0;
|
Chris@23
|
371 sumchordvalue+=tempchordvalue;
|
Chris@23
|
372 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
373 }
|
Chris@23
|
374 if (sumchordvalue > 0) {
|
Chris@23
|
375 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
376 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
377 }
|
Chris@23
|
378 } else {
|
Chris@23
|
379 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
380 }
|
Chris@23
|
381 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
382
|
Chris@23
|
383 count++;
|
Chris@23
|
384 }
|
Chris@23
|
385 cerr << "done." << endl;
|
matthiasm@13
|
386
|
matthiasm@10
|
387
|
matthiasm@43
|
388 bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@43
|
389 if (m_useHMM) {
|
matthiasm@44
|
390 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
391 int oldchord = nChord-1;
|
matthiasm@44
|
392 double selftransprob = 0.9;
|
matthiasm@43
|
393
|
matthiasm@43
|
394 vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@43
|
395 vector<vector<double> > trans;
|
matthiasm@43
|
396 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
397 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
398 temp[iChord] = selftransprob;
|
matthiasm@43
|
399 trans.push_back(temp);
|
matthiasm@43
|
400 }
|
matthiasm@43
|
401 vector<int> chordpath = ViterbiPath(init,trans,chordogram);
|
matthiasm@43
|
402
|
matthiasm@43
|
403 for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) {
|
matthiasm@43
|
404 // cerr << chordpath[iFrame] << endl;
|
matthiasm@43
|
405 if (chordpath[iFrame] != oldchord) {
|
matthiasm@43
|
406 Feature chord_feature; // chord estimate
|
matthiasm@43
|
407 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
408 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
409 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
matthiasm@43
|
410 fsOut[0].push_back(chord_feature);
|
matthiasm@43
|
411 oldchord = chordpath[iFrame];
|
Chris@23
|
412 }
|
Chris@23
|
413 }
|
matthiasm@43
|
414
|
matthiasm@43
|
415 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
416 } else {
|
matthiasm@43
|
417 /* Simple chord estimation
|
matthiasm@43
|
418 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
419 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
420 */
|
matthiasm@44
|
421 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
422 count = 0;
|
matthiasm@43
|
423 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
424 vector<int> chordSequence;
|
matthiasm@43
|
425 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
426 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
427 scoreChordogram.push_back(temp);
|
matthiasm@43
|
428 }
|
matthiasm@43
|
429 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
430 int startIndex = count + 1;
|
matthiasm@43
|
431 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
432
|
matthiasm@43
|
433 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
434
|
matthiasm@43
|
435 vector<int> chordCandidates;
|
matthiasm@43
|
436 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
matthiasm@43
|
437 // float currsum = 0;
|
matthiasm@43
|
438 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
439 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
440 // }
|
matthiasm@43
|
441 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
matthiasm@43
|
442 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
443 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
444 chordCandidates.push_back(iChord);
|
matthiasm@43
|
445 break;
|
matthiasm@43
|
446 }
|
Chris@23
|
447 }
|
Chris@23
|
448 }
|
matthiasm@43
|
449 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
450 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
451
|
matthiasm@43
|
452 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
453 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
454 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
matthiasm@43
|
455 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
456
|
matthiasm@43
|
457 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
458 // now find the max values on both sides of iWF
|
matthiasm@43
|
459 // left side:
|
matthiasm@43
|
460 float maxL = 0;
|
matthiasm@43
|
461 unsigned maxindL = nChord-1;
|
matthiasm@43
|
462 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
463 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
464 float currsum = 0;
|
matthiasm@43
|
465 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
466 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
467 }
|
matthiasm@43
|
468 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
469 if (currsum > maxL) {
|
matthiasm@43
|
470 maxL = currsum;
|
matthiasm@43
|
471 maxindL = iChord;
|
matthiasm@43
|
472 }
|
matthiasm@43
|
473 }
|
matthiasm@43
|
474 // right side:
|
matthiasm@43
|
475 float maxR = 0;
|
matthiasm@43
|
476 unsigned maxindR = nChord-1;
|
matthiasm@43
|
477 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
478 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
479 float currsum = 0;
|
matthiasm@43
|
480 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
481 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
482 }
|
matthiasm@43
|
483 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
484 if (currsum > maxR) {
|
matthiasm@43
|
485 maxR = currsum;
|
matthiasm@43
|
486 maxindR = iChord;
|
matthiasm@43
|
487 }
|
matthiasm@43
|
488 }
|
matthiasm@43
|
489 if (maxL+maxR > maxval) {
|
matthiasm@43
|
490 maxval = maxL+maxR;
|
matthiasm@43
|
491 maxindex = iWF;
|
matthiasm@43
|
492 bestchordL = maxindL;
|
matthiasm@43
|
493 bestchordR = maxindR;
|
matthiasm@43
|
494 }
|
matthiasm@43
|
495
|
Chris@23
|
496 }
|
matthiasm@43
|
497 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
498 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@43
|
499 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
500 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
501 }
|
matthiasm@43
|
502 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
503 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
504 }
|
matthiasm@43
|
505 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@43
|
506 count++;
|
Chris@23
|
507 }
|
matthiasm@43
|
508 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
509 count = 0;
|
matthiasm@43
|
510 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
511 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
512 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
513 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
514 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
515 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
516 maxindex = iChord;
|
matthiasm@43
|
517 // cerr << iChord << endl;
|
matthiasm@43
|
518 }
|
matthiasm@43
|
519 }
|
matthiasm@43
|
520 chordSequence.push_back(maxindex);
|
matthiasm@43
|
521 count++;
|
Chris@23
|
522 }
|
matthiasm@43
|
523
|
matthiasm@43
|
524
|
matthiasm@43
|
525 // mode filter on chordSequence
|
matthiasm@43
|
526 count = 0;
|
matthiasm@43
|
527 string oldChord = "";
|
matthiasm@43
|
528 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
529 Feature chord_feature; // chord estimate
|
matthiasm@43
|
530 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
531 chord_feature.timestamp = *it;
|
matthiasm@43
|
532 // Feature currentChord; // chord estimate
|
matthiasm@43
|
533 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
534 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
535
|
matthiasm@43
|
536 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
537 int maxChordCount = 0;
|
matthiasm@43
|
538 int maxChordIndex = nChord-1;
|
matthiasm@43
|
539 string maxChord;
|
matthiasm@43
|
540 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
541 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
542 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
543 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
544 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
545 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
546 maxChordCount++;
|
matthiasm@43
|
547 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
548 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
549 }
|
matthiasm@43
|
550 }
|
matthiasm@43
|
551 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
552 // cerr << maxChordIndex << endl;
|
matthiasm@43
|
553 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
554 // fsOut[9].push_back(currentChord);
|
matthiasm@43
|
555 if (oldChord != maxChord) {
|
matthiasm@43
|
556 oldChord = maxChord;
|
matthiasm@43
|
557 chord_feature.label = m_chordnames[maxChordIndex];
|
matthiasm@43
|
558 fsOut[0].push_back(chord_feature);
|
matthiasm@43
|
559 }
|
matthiasm@43
|
560 count++;
|
Chris@23
|
561 }
|
Chris@23
|
562 }
|
matthiasm@43
|
563 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
564 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
565 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
566 chord_feature.label = "N";
|
matthiasm@43
|
567 fsOut[0].push_back(chord_feature);
|
Chris@23
|
568 cerr << "done." << endl;
|
Chris@23
|
569 return fsOut;
|
matthiasm@0
|
570 }
|