Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
39 }
|
matthiasm@0
|
40
|
Chris@35
|
41 Chordino::~Chordino()
|
matthiasm@0
|
42 {
|
Chris@35
|
43 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
44 }
|
matthiasm@0
|
45
|
matthiasm@0
|
46 string
|
Chris@35
|
47 Chordino::getIdentifier() const
|
matthiasm@0
|
48 {
|
Chris@23
|
49 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
50 return "chordino";
|
matthiasm@0
|
51 }
|
matthiasm@0
|
52
|
matthiasm@0
|
53 string
|
Chris@35
|
54 Chordino::getName() const
|
matthiasm@0
|
55 {
|
Chris@23
|
56 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
57 return "Chordino";
|
matthiasm@0
|
58 }
|
matthiasm@0
|
59
|
matthiasm@0
|
60 string
|
Chris@35
|
61 Chordino::getDescription() const
|
matthiasm@0
|
62 {
|
Chris@23
|
63 if (debug_on) cerr << "--> getDescription" << endl;
|
Chris@149
|
64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. A simple (non-state-of-the-art!) algorithm smooths these to provide a chord transcription using a standard HMM/Viterbi approach.";
|
matthiasm@0
|
65 }
|
matthiasm@0
|
66
|
matthiasm@50
|
67 Chordino::ParameterList
|
matthiasm@50
|
68 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
69 {
|
matthiasm@50
|
70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
71 ParameterList list;
|
matthiasm@50
|
72
|
mail@118
|
73 ParameterDescriptor useNNLSParam;
|
mail@118
|
74 useNNLSParam.identifier = "useNNLS";
|
mail@118
|
75 useNNLSParam.name = "use approximate transcription (NNLS)";
|
mail@118
|
76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
|
mail@118
|
77 useNNLSParam.unit = "";
|
mail@118
|
78 useNNLSParam.minValue = 0.0;
|
mail@118
|
79 useNNLSParam.maxValue = 1.0;
|
mail@118
|
80 useNNLSParam.defaultValue = 1.0;
|
mail@118
|
81 useNNLSParam.isQuantized = true;
|
mail@118
|
82 useNNLSParam.quantizeStep = 1.0;
|
mail@118
|
83 list.push_back(useNNLSParam);
|
matthiasm@50
|
84
|
mail@118
|
85 ParameterDescriptor rollonParam;
|
mail@118
|
86 rollonParam.identifier = "rollon";
|
mail@118
|
87 rollonParam.name = "bass noise threshold";
|
mail@118
|
88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
|
mail@118
|
89 rollonParam.unit = "%";
|
mail@118
|
90 rollonParam.minValue = 0;
|
mail@118
|
91 rollonParam.maxValue = 5;
|
mail@118
|
92 rollonParam.defaultValue = 0.0;
|
mail@118
|
93 rollonParam.isQuantized = true;
|
mail@118
|
94 rollonParam.quantizeStep = 0.5;
|
mail@118
|
95 list.push_back(rollonParam);
|
matthiasm@50
|
96
|
mail@118
|
97 ParameterDescriptor tuningmodeParam;
|
mail@118
|
98 tuningmodeParam.identifier = "tuningmode";
|
mail@118
|
99 tuningmodeParam.name = "tuning mode";
|
mail@118
|
100 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
mail@118
|
101 tuningmodeParam.unit = "";
|
mail@118
|
102 tuningmodeParam.minValue = 0;
|
mail@118
|
103 tuningmodeParam.maxValue = 1;
|
mail@118
|
104 tuningmodeParam.defaultValue = 0.0;
|
mail@118
|
105 tuningmodeParam.isQuantized = true;
|
mail@118
|
106 tuningmodeParam.valueNames.push_back("global tuning");
|
mail@118
|
107 tuningmodeParam.valueNames.push_back("local tuning");
|
mail@118
|
108 tuningmodeParam.quantizeStep = 1.0;
|
mail@118
|
109 list.push_back(tuningmodeParam);
|
matthiasm@50
|
110
|
mail@118
|
111 ParameterDescriptor whiteningParam;
|
mail@118
|
112 whiteningParam.identifier = "whitening";
|
mail@118
|
113 whiteningParam.name = "spectral whitening";
|
mail@118
|
114 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@118
|
115 whiteningParam.unit = "";
|
mail@118
|
116 whiteningParam.isQuantized = true;
|
mail@118
|
117 whiteningParam.minValue = 0.0;
|
mail@118
|
118 whiteningParam.maxValue = 1.0;
|
mail@118
|
119 whiteningParam.defaultValue = 1.0;
|
mail@118
|
120 whiteningParam.isQuantized = false;
|
mail@118
|
121 list.push_back(whiteningParam);
|
matthiasm@50
|
122
|
mail@118
|
123 ParameterDescriptor spectralShapeParam;
|
Chris@164
|
124 spectralShapeParam.identifier = "s";
|
mail@118
|
125 spectralShapeParam.name = "spectral shape";
|
mail@118
|
126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@118
|
127 spectralShapeParam.unit = "";
|
mail@118
|
128 spectralShapeParam.minValue = 0.5;
|
mail@118
|
129 spectralShapeParam.maxValue = 0.9;
|
mail@118
|
130 spectralShapeParam.defaultValue = 0.7;
|
mail@118
|
131 spectralShapeParam.isQuantized = false;
|
mail@118
|
132 list.push_back(spectralShapeParam);
|
matthiasm@50
|
133
|
mail@118
|
134 ParameterDescriptor boostnParam;
|
mail@118
|
135 boostnParam.identifier = "boostn";
|
mail@118
|
136 boostnParam.name = "boost N";
|
mail@118
|
137 boostnParam.description = "Boost likelihood of the N (no chord) label.";
|
mail@118
|
138 boostnParam.unit = "";
|
mail@118
|
139 boostnParam.minValue = 0.0;
|
mail@118
|
140 boostnParam.maxValue = 1.0;
|
mail@118
|
141 boostnParam.defaultValue = 0.1;
|
mail@118
|
142 boostnParam.isQuantized = false;
|
mail@118
|
143 list.push_back(boostnParam);
|
matthiasm@50
|
144
|
mail@118
|
145 ParameterDescriptor usehartesyntaxParam;
|
mail@118
|
146 usehartesyntaxParam.identifier = "usehartesyntax";
|
mail@118
|
147 usehartesyntaxParam.name = "use Harte syntax";
|
mail@118
|
148 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
|
mail@118
|
149 usehartesyntaxParam.unit = "";
|
mail@118
|
150 usehartesyntaxParam.minValue = 0.0;
|
mail@118
|
151 usehartesyntaxParam.maxValue = 1.0;
|
mail@118
|
152 usehartesyntaxParam.defaultValue = 0.0;
|
mail@118
|
153 usehartesyntaxParam.isQuantized = true;
|
mail@118
|
154 usehartesyntaxParam.quantizeStep = 1.0;
|
mail@118
|
155 usehartesyntaxParam.valueNames.push_back("no");
|
mail@118
|
156 usehartesyntaxParam.valueNames.push_back("yes");
|
mail@118
|
157 list.push_back(usehartesyntaxParam);
|
mail@112
|
158
|
matthiasm@50
|
159 return list;
|
matthiasm@50
|
160 }
|
matthiasm@50
|
161
|
Chris@35
|
162 Chordino::OutputList
|
Chris@35
|
163 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
164 {
|
Chris@23
|
165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
166 OutputList list;
|
matthiasm@0
|
167
|
Chris@35
|
168 int index = 0;
|
matthiasm@0
|
169
|
Chris@164
|
170 float featureRate =
|
Chris@164
|
171 (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
Chris@164
|
172
|
matthiasm@0
|
173 OutputDescriptor d7;
|
matthiasm@0
|
174 d7.identifier = "simplechord";
|
Chris@36
|
175 d7.name = "Chord Estimate";
|
matthiasm@133
|
176 d7.description = "Estimated chord times and labels.";
|
matthiasm@0
|
177 d7.unit = "";
|
matthiasm@0
|
178 d7.hasFixedBinCount = true;
|
matthiasm@0
|
179 d7.binCount = 0;
|
matthiasm@0
|
180 d7.hasKnownExtents = false;
|
matthiasm@0
|
181 d7.isQuantized = false;
|
matthiasm@0
|
182 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
183 d7.hasDuration = false;
|
Chris@164
|
184 d7.sampleRate = featureRate;
|
matthiasm@0
|
185 list.push_back(d7);
|
Chris@35
|
186 m_outputChords = index++;
|
matthiasm@0
|
187
|
matthiasm@86
|
188 OutputDescriptor chordnotes;
|
matthiasm@86
|
189 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
190 chordnotes.name = "Note Representation of Chord Estimate";
|
Chris@149
|
191 chordnotes.description = "A simple representation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
192 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
193 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
194 chordnotes.binCount = 1;
|
matthiasm@86
|
195 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
196 chordnotes.minValue = 0;
|
matthiasm@86
|
197 chordnotes.maxValue = 127;
|
matthiasm@86
|
198 chordnotes.isQuantized = true;
|
matthiasm@86
|
199 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
200 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
201 chordnotes.hasDuration = true;
|
Chris@164
|
202 chordnotes.sampleRate = featureRate;
|
matthiasm@86
|
203 list.push_back(chordnotes);
|
matthiasm@86
|
204 m_outputChordnotes = index++;
|
matthiasm@86
|
205
|
Chris@23
|
206 OutputDescriptor d8;
|
mail@60
|
207 d8.identifier = "harmonicchange";
|
Chris@36
|
208 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
209 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
210 d8.unit = "";
|
matthiasm@17
|
211 d8.hasFixedBinCount = true;
|
matthiasm@17
|
212 d8.binCount = 1;
|
mail@60
|
213 d8.hasKnownExtents = false;
|
matthiasm@17
|
214 d8.isQuantized = false;
|
matthiasm@17
|
215 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@164
|
216 d8.sampleRate = featureRate;
|
matthiasm@17
|
217 d8.hasDuration = false;
|
matthiasm@17
|
218 list.push_back(d8);
|
Chris@35
|
219 m_outputHarmonicChange = index++;
|
matthiasm@1
|
220
|
matthiasm@107
|
221 OutputDescriptor loglikelihood;
|
matthiasm@107
|
222 loglikelihood.identifier = "loglikelihood";
|
mail@126
|
223 loglikelihood.name = "Log-Likelihood of Chord Estimate";
|
mail@124
|
224 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
|
matthiasm@107
|
225 loglikelihood.unit = "";
|
matthiasm@107
|
226 loglikelihood.hasFixedBinCount = true;
|
matthiasm@107
|
227 loglikelihood.binCount = 1;
|
matthiasm@107
|
228 loglikelihood.hasKnownExtents = false;
|
matthiasm@107
|
229 loglikelihood.isQuantized = false;
|
matthiasm@107
|
230 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@164
|
231 loglikelihood.sampleRate = featureRate;
|
matthiasm@107
|
232 loglikelihood.hasDuration = false;
|
matthiasm@107
|
233 list.push_back(loglikelihood);
|
matthiasm@107
|
234 m_outputLoglikelihood = index++;
|
matthiasm@106
|
235
|
matthiasm@0
|
236 return list;
|
matthiasm@0
|
237 }
|
matthiasm@0
|
238
|
matthiasm@0
|
239 bool
|
Chris@35
|
240 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
241 {
|
Chris@23
|
242 if (debug_on) {
|
Chris@23
|
243 cerr << "--> initialise";
|
Chris@23
|
244 }
|
mail@76
|
245
|
Chris@35
|
246 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
247 return false;
|
Chris@35
|
248 }
|
mail@115
|
249 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
|
matthiasm@0
|
250 return true;
|
matthiasm@0
|
251 }
|
matthiasm@0
|
252
|
matthiasm@0
|
253 void
|
Chris@35
|
254 Chordino::reset()
|
matthiasm@0
|
255 {
|
Chris@23
|
256 if (debug_on) cerr << "--> reset";
|
Chris@35
|
257 NNLSBase::reset();
|
matthiasm@0
|
258 }
|
matthiasm@0
|
259
|
Chris@35
|
260 Chordino::FeatureSet
|
Chris@35
|
261 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
262 {
|
Chris@23
|
263 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
264
|
Chris@35
|
265 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
266
|
Chris@35
|
267 return FeatureSet();
|
matthiasm@0
|
268 }
|
matthiasm@0
|
269
|
Chris@35
|
270 Chordino::FeatureSet
|
Chris@35
|
271 Chordino::getRemainingFeatures()
|
matthiasm@0
|
272 {
|
mail@89
|
273 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
274 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
275 FeatureSet fsOut;
|
Chris@35
|
276 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
277 int nChord = m_chordnames.size();
|
Chris@23
|
278 //
|
Chris@23
|
279 /** Calculate Tuning
|
Chris@23
|
280 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
281 cumulative mean real and imag values)
|
Chris@23
|
282 **/
|
mail@80
|
283 float meanTuningImag = 0;
|
mail@80
|
284 float meanTuningReal = 0;
|
mail@80
|
285 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
286 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
287 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
288 }
|
Chris@23
|
289 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
290 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
291 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
292 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
293
|
Chris@23
|
294 char buffer0 [50];
|
matthiasm@1
|
295
|
Chris@23
|
296 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
297
|
matthiasm@1
|
298
|
Chris@23
|
299 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
300 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
301 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
302 **/
|
Chris@163
|
303 if (debug_on) cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
304
|
Chris@23
|
305 int count = 0;
|
matthiasm@1
|
306
|
Chris@35
|
307 FeatureList tunedSpec;
|
matthiasm@43
|
308 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
309
|
matthiasm@43
|
310 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
311
|
Chris@35
|
312 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
313 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
314 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
315 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
316 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
317 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
318 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
319
|
Chris@23
|
320 if (m_tuneLocal) {
|
Chris@23
|
321 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
322 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
323 }
|
matthiasm@1
|
324
|
mail@80
|
325 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
326
|
Chris@91
|
327 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@115
|
328 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
329 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
330 }
|
matthiasm@1
|
331
|
matthiasm@43
|
332 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
333 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
334 vector<float> runningstd;
|
mail@77
|
335 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
336 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
337 }
|
Chris@23
|
338 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
339 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
340 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
341 if (runningstd[i] > 0) {
|
matthiasm@43
|
342 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
343 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
344 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
345 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
346 }
|
matthiasm@43
|
347 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
348 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
349 }
|
Chris@23
|
350 }
|
matthiasm@43
|
351 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
352 count++;
|
Chris@23
|
353 }
|
Chris@163
|
354 if (debug_on) cerr << "done." << endl;
|
matthiasm@1
|
355
|
Chris@23
|
356 /** Semitone spectrum and chromagrams
|
Chris@23
|
357 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
358 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
359 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
360 bass and treble stacked onto each other).
|
Chris@23
|
361 **/
|
matthiasm@42
|
362 if (m_useNNLS == 0) {
|
Chris@163
|
363 if (debug_on) cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
364 } else {
|
Chris@163
|
365 if (debug_on) cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
366 }
|
matthiasm@13
|
367
|
matthiasm@1
|
368
|
matthiasm@43
|
369 vector<vector<double> > chordogram;
|
Chris@23
|
370 vector<vector<int> > scoreChordogram;
|
Chris@35
|
371 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
372 count = 0;
|
matthiasm@9
|
373
|
Chris@35
|
374 FeatureList chromaList;
|
matthiasm@43
|
375
|
Chris@164
|
376 bool clipwarned = false;
|
Chris@35
|
377
|
Chris@35
|
378 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
379 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
380 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
381
|
matthiasm@43
|
382 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
383 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
384
|
mail@77
|
385 float b[nNote];
|
matthiasm@1
|
386
|
Chris@23
|
387 bool some_b_greater_zero = false;
|
Chris@23
|
388 float sumb = 0;
|
mail@77
|
389 for (int i = 0; i < nNote; i++) {
|
mail@77
|
390 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
391 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
392 sumb += b[i];
|
Chris@23
|
393 if (b[i] > 0) {
|
Chris@23
|
394 some_b_greater_zero = true;
|
Chris@23
|
395 }
|
Chris@23
|
396 }
|
matthiasm@1
|
397
|
Chris@23
|
398 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
399
|
Chris@23
|
400 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
401 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
402 float currval;
|
Chris@91
|
403 int iSemitone = 0;
|
matthiasm@1
|
404
|
Chris@23
|
405 if (some_b_greater_zero) {
|
matthiasm@42
|
406 if (m_useNNLS == 0) {
|
Chris@91
|
407 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
408 currval = 0;
|
mail@81
|
409 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
410 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
411 }
|
Chris@23
|
412 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
413 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
414 iSemitone++;
|
Chris@23
|
415 }
|
matthiasm@1
|
416
|
Chris@23
|
417 } else {
|
Chris@35
|
418 float x[84+1000];
|
Chris@23
|
419 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
420 vector<int> signifIndex;
|
Chris@23
|
421 int index=0;
|
Chris@23
|
422 sumb /= 84.0;
|
Chris@91
|
423 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
424 float currval = 0;
|
mail@81
|
425 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
426 currval += b[iNote + iBPS];
|
mail@81
|
427 }
|
Chris@23
|
428 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
429 index++;
|
Chris@23
|
430 }
|
Chris@35
|
431 float rnorm;
|
Chris@35
|
432 float w[84+1000];
|
Chris@35
|
433 float zz[84+1000];
|
Chris@23
|
434 int indx[84+1000];
|
Chris@23
|
435 int mode;
|
mail@77
|
436 int dictsize = nNote*signifIndex.size();
|
mail@81
|
437 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
438 float *curr_dict = new float[dictsize];
|
Chris@91
|
439 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
440 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
441 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
442 }
|
Chris@23
|
443 }
|
Chris@35
|
444 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
445 delete [] curr_dict;
|
Chris@91
|
446 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
447 // cerr << mode << endl;
|
Chris@23
|
448 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
449 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
450 }
|
Chris@23
|
451 }
|
Chris@23
|
452 }
|
Chris@35
|
453
|
Chris@35
|
454 vector<float> origchroma = chroma;
|
Chris@23
|
455 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
456 currentChromas.values = chroma;
|
Chris@164
|
457
|
Chris@23
|
458 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
459 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
460 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
461 case 0: // should never end up here
|
Chris@23
|
462 break;
|
Chris@23
|
463 case 1:
|
Chris@35
|
464 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
465 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
466 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
467 break;
|
Chris@23
|
468 case 2:
|
Chris@35
|
469 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
470 chromanorm[2] += *it;
|
Chris@23
|
471 }
|
Chris@23
|
472 break;
|
Chris@23
|
473 case 3:
|
Chris@35
|
474 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
475 chromanorm[2] += pow(*it,2);
|
Chris@23
|
476 }
|
Chris@23
|
477 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
478 break;
|
Chris@23
|
479 }
|
Chris@23
|
480 if (chromanorm[2] > 0) {
|
Chris@91
|
481 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
482 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
483 }
|
Chris@23
|
484 }
|
Chris@23
|
485 }
|
Chris@35
|
486
|
matthiasm@43
|
487 chromaList.push_back(currentChromas);
|
Chris@35
|
488
|
Chris@23
|
489 // local chord estimation
|
matthiasm@43
|
490 vector<double> currentChordSalience;
|
matthiasm@43
|
491 double tempchordvalue = 0;
|
matthiasm@43
|
492 double sumchordvalue = 0;
|
matthiasm@9
|
493
|
Chris@23
|
494 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
495 tempchordvalue = 0;
|
Chris@23
|
496 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@164
|
497 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
498 }
|
Chris@23
|
499 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
500 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
501 }
|
matthiasm@48
|
502 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
503 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
Chris@168
|
504 if (tempchordvalue > 200.0) {
|
Chris@164
|
505 if (!clipwarned) {
|
Chris@164
|
506 cerr << "WARNING: interim chroma contains extreme chord value " << tempchordvalue << ", clipping this and any others that appear" << endl;
|
Chris@164
|
507 clipwarned = true;
|
Chris@164
|
508 }
|
Chris@168
|
509 tempchordvalue = 200.0;
|
Chris@164
|
510 }
|
Chris@164
|
511 tempchordvalue = pow(1.3, tempchordvalue);
|
Chris@164
|
512 sumchordvalue += tempchordvalue;
|
Chris@23
|
513 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
514 }
|
Chris@23
|
515 if (sumchordvalue > 0) {
|
Chris@23
|
516 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
517 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
518 }
|
Chris@23
|
519 } else {
|
Chris@23
|
520 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
521 }
|
Chris@23
|
522 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
523
|
Chris@23
|
524 count++;
|
Chris@23
|
525 }
|
Chris@163
|
526 if (debug_on) cerr << "done." << endl;
|
matthiasm@13
|
527
|
matthiasm@86
|
528 vector<Feature> oldnotes;
|
matthiasm@10
|
529
|
Chris@163
|
530 if (debug_on) cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@131
|
531 int oldchord = nChord-1;
|
matthiasm@131
|
532 double selftransprob = 0.99;
|
matthiasm@131
|
533
|
matthiasm@131
|
534 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@131
|
535 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@131
|
536
|
matthiasm@131
|
537 double *delta;
|
matthiasm@131
|
538 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@131
|
539
|
matthiasm@131
|
540 vector<vector<double> > trans;
|
matthiasm@131
|
541 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@131
|
542 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@131
|
543 temp[iChord] = selftransprob;
|
matthiasm@131
|
544 trans.push_back(temp);
|
matthiasm@131
|
545 }
|
matthiasm@131
|
546 vector<double> scale;
|
matthiasm@131
|
547 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
|
matthiasm@131
|
548
|
matthiasm@131
|
549 Feature chord_feature; // chord estimate
|
matthiasm@131
|
550 chord_feature.hasTimestamp = true;
|
matthiasm@131
|
551 chord_feature.timestamp = timestamps[0];
|
matthiasm@131
|
552 chord_feature.label = m_chordnames[chordpath[0]];
|
matthiasm@131
|
553 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@131
|
554
|
matthiasm@131
|
555 chordchange[0] = 0;
|
matthiasm@131
|
556 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@131
|
557 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@131
|
558 // chord
|
matthiasm@131
|
559 Feature chord_feature; // chord estimate
|
matthiasm@131
|
560 chord_feature.hasTimestamp = true;
|
matthiasm@131
|
561 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@131
|
562 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
matthiasm@131
|
563 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@131
|
564 oldchord = chordpath[iFrame];
|
matthiasm@131
|
565 // chord notes
|
matthiasm@131
|
566 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@131
|
567 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@131
|
568 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
Chris@23
|
569 }
|
matthiasm@131
|
570 oldnotes.clear();
|
matthiasm@131
|
571 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@131
|
572 Feature chordnote_feature;
|
matthiasm@131
|
573 chordnote_feature.hasTimestamp = true;
|
matthiasm@131
|
574 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@131
|
575 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@131
|
576 chordnote_feature.hasDuration = true;
|
matthiasm@131
|
577 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@131
|
578 oldnotes.push_back(chordnote_feature);
|
matthiasm@50
|
579 }
|
Chris@23
|
580 }
|
matthiasm@131
|
581 /* calculating simple chord change prob */
|
matthiasm@131
|
582 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@164
|
583 double num = delta[(iFrame-1) * nChord + iChord];
|
Chris@164
|
584 double denom = delta[iFrame * nChord + iChord];
|
Chris@164
|
585 double eps = 1e-7;
|
Chris@164
|
586 if (denom < eps) denom = eps;
|
Chris@164
|
587 chordchange[iFrame-1] += num * log(num / denom + eps);
|
Chris@23
|
588 }
|
Chris@23
|
589 }
|
matthiasm@131
|
590
|
matthiasm@131
|
591 float logscale = 0;
|
matthiasm@131
|
592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@131
|
593 logscale -= log(scale[iFrame]);
|
matthiasm@131
|
594 Feature loglikelihood;
|
matthiasm@131
|
595 loglikelihood.hasTimestamp = true;
|
matthiasm@131
|
596 loglikelihood.timestamp = timestamps[iFrame];
|
matthiasm@131
|
597 loglikelihood.values.push_back(-log(scale[iFrame]));
|
matthiasm@131
|
598 // cerr << chordchange[iFrame] << endl;
|
matthiasm@131
|
599 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
|
matthiasm@131
|
600 }
|
matthiasm@131
|
601 logscale /= nFrame;
|
matthiasm@131
|
602
|
matthiasm@43
|
603 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
604 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
605 chord_feature.label = "N";
|
mail@60
|
606 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
607
|
Chris@91
|
608 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
609 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
610 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
611 }
|
matthiasm@86
|
612
|
Chris@163
|
613 if (debug_on) cerr << "done." << endl;
|
Chris@159
|
614
|
matthiasm@50
|
615 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
616 Feature chordchange_feature;
|
matthiasm@50
|
617 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
618 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
619 chordchange_feature.values.push_back(chordchange[iFrame]);
|
Chris@164
|
620 // cerr << "putting value " << chordchange[iFrame] << " at time " << chordchange_feature.timestamp << endl;
|
mail@60
|
621 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
622 }
|
Chris@161
|
623
|
Chris@161
|
624 free(delta);
|
matthiasm@50
|
625
|
mail@60
|
626 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
627
|
Chris@23
|
628 return fsOut;
|
matthiasm@0
|
629 }
|