Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
39 }
|
matthiasm@0
|
40
|
Chris@35
|
41 Chordino::~Chordino()
|
matthiasm@0
|
42 {
|
Chris@35
|
43 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
44 }
|
matthiasm@0
|
45
|
matthiasm@0
|
46 string
|
Chris@35
|
47 Chordino::getIdentifier() const
|
matthiasm@0
|
48 {
|
Chris@23
|
49 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
50 return "chordino";
|
matthiasm@0
|
51 }
|
matthiasm@0
|
52
|
matthiasm@0
|
53 string
|
Chris@35
|
54 Chordino::getName() const
|
matthiasm@0
|
55 {
|
Chris@23
|
56 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
57 return "Chordino";
|
matthiasm@0
|
58 }
|
matthiasm@0
|
59
|
matthiasm@0
|
60 string
|
Chris@35
|
61 Chordino::getDescription() const
|
matthiasm@0
|
62 {
|
Chris@23
|
63 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@133
|
64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. A simple (non-state-of-the-art!) algorithm smoothes these to provide a chord transcription using a standard HMM/Viterbi approach.";
|
matthiasm@0
|
65 }
|
matthiasm@0
|
66
|
matthiasm@50
|
67 Chordino::ParameterList
|
matthiasm@50
|
68 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
69 {
|
matthiasm@50
|
70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
71 ParameterList list;
|
matthiasm@50
|
72
|
mail@118
|
73 ParameterDescriptor useNNLSParam;
|
mail@118
|
74 useNNLSParam.identifier = "useNNLS";
|
mail@118
|
75 useNNLSParam.name = "use approximate transcription (NNLS)";
|
mail@118
|
76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
|
mail@118
|
77 useNNLSParam.unit = "";
|
mail@118
|
78 useNNLSParam.minValue = 0.0;
|
mail@118
|
79 useNNLSParam.maxValue = 1.0;
|
mail@118
|
80 useNNLSParam.defaultValue = 1.0;
|
mail@118
|
81 useNNLSParam.isQuantized = true;
|
mail@118
|
82 useNNLSParam.quantizeStep = 1.0;
|
mail@118
|
83 list.push_back(useNNLSParam);
|
matthiasm@50
|
84
|
mail@118
|
85 ParameterDescriptor rollonParam;
|
mail@118
|
86 rollonParam.identifier = "rollon";
|
mail@118
|
87 rollonParam.name = "bass noise threshold";
|
mail@118
|
88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
|
mail@118
|
89 rollonParam.unit = "%";
|
mail@118
|
90 rollonParam.minValue = 0;
|
mail@118
|
91 rollonParam.maxValue = 5;
|
mail@118
|
92 rollonParam.defaultValue = 0.0;
|
mail@118
|
93 rollonParam.isQuantized = true;
|
mail@118
|
94 rollonParam.quantizeStep = 0.5;
|
mail@118
|
95 list.push_back(rollonParam);
|
matthiasm@50
|
96
|
mail@118
|
97 ParameterDescriptor tuningmodeParam;
|
mail@118
|
98 tuningmodeParam.identifier = "tuningmode";
|
mail@118
|
99 tuningmodeParam.name = "tuning mode";
|
mail@118
|
100 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
mail@118
|
101 tuningmodeParam.unit = "";
|
mail@118
|
102 tuningmodeParam.minValue = 0;
|
mail@118
|
103 tuningmodeParam.maxValue = 1;
|
mail@118
|
104 tuningmodeParam.defaultValue = 0.0;
|
mail@118
|
105 tuningmodeParam.isQuantized = true;
|
mail@118
|
106 tuningmodeParam.valueNames.push_back("global tuning");
|
mail@118
|
107 tuningmodeParam.valueNames.push_back("local tuning");
|
mail@118
|
108 tuningmodeParam.quantizeStep = 1.0;
|
mail@118
|
109 list.push_back(tuningmodeParam);
|
matthiasm@50
|
110
|
mail@118
|
111 ParameterDescriptor whiteningParam;
|
mail@118
|
112 whiteningParam.identifier = "whitening";
|
mail@118
|
113 whiteningParam.name = "spectral whitening";
|
mail@118
|
114 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@118
|
115 whiteningParam.unit = "";
|
mail@118
|
116 whiteningParam.isQuantized = true;
|
mail@118
|
117 whiteningParam.minValue = 0.0;
|
mail@118
|
118 whiteningParam.maxValue = 1.0;
|
mail@118
|
119 whiteningParam.defaultValue = 1.0;
|
mail@118
|
120 whiteningParam.isQuantized = false;
|
mail@118
|
121 list.push_back(whiteningParam);
|
matthiasm@50
|
122
|
mail@118
|
123 ParameterDescriptor spectralShapeParam;
|
mail@118
|
124 spectralShapeParam.identifier = "spectralshape";
|
mail@118
|
125 spectralShapeParam.name = "spectral shape";
|
mail@118
|
126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@118
|
127 spectralShapeParam.unit = "";
|
mail@118
|
128 spectralShapeParam.minValue = 0.5;
|
mail@118
|
129 spectralShapeParam.maxValue = 0.9;
|
mail@118
|
130 spectralShapeParam.defaultValue = 0.7;
|
mail@118
|
131 spectralShapeParam.isQuantized = false;
|
mail@118
|
132 list.push_back(spectralShapeParam);
|
matthiasm@50
|
133
|
mail@118
|
134 ParameterDescriptor boostnParam;
|
mail@118
|
135 boostnParam.identifier = "boostn";
|
mail@118
|
136 boostnParam.name = "boost N";
|
mail@118
|
137 boostnParam.description = "Boost likelihood of the N (no chord) label.";
|
mail@118
|
138 boostnParam.unit = "";
|
mail@118
|
139 boostnParam.minValue = 0.0;
|
mail@118
|
140 boostnParam.maxValue = 1.0;
|
mail@118
|
141 boostnParam.defaultValue = 0.1;
|
mail@118
|
142 boostnParam.isQuantized = false;
|
mail@118
|
143 list.push_back(boostnParam);
|
matthiasm@50
|
144
|
mail@118
|
145 ParameterDescriptor usehartesyntaxParam;
|
mail@118
|
146 usehartesyntaxParam.identifier = "usehartesyntax";
|
mail@118
|
147 usehartesyntaxParam.name = "use Harte syntax";
|
mail@118
|
148 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
|
mail@118
|
149 usehartesyntaxParam.unit = "";
|
mail@118
|
150 usehartesyntaxParam.minValue = 0.0;
|
mail@118
|
151 usehartesyntaxParam.maxValue = 1.0;
|
mail@118
|
152 usehartesyntaxParam.defaultValue = 0.0;
|
mail@118
|
153 usehartesyntaxParam.isQuantized = true;
|
mail@118
|
154 usehartesyntaxParam.quantizeStep = 1.0;
|
mail@118
|
155 usehartesyntaxParam.valueNames.push_back("no");
|
mail@118
|
156 usehartesyntaxParam.valueNames.push_back("yes");
|
mail@118
|
157 list.push_back(usehartesyntaxParam);
|
mail@112
|
158
|
matthiasm@50
|
159 return list;
|
matthiasm@50
|
160 }
|
matthiasm@50
|
161
|
Chris@35
|
162 Chordino::OutputList
|
Chris@35
|
163 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
164 {
|
Chris@23
|
165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
166 OutputList list;
|
matthiasm@0
|
167
|
Chris@35
|
168 int index = 0;
|
matthiasm@0
|
169
|
matthiasm@0
|
170 OutputDescriptor d7;
|
matthiasm@0
|
171 d7.identifier = "simplechord";
|
Chris@36
|
172 d7.name = "Chord Estimate";
|
matthiasm@133
|
173 d7.description = "Estimated chord times and labels.";
|
matthiasm@0
|
174 d7.unit = "";
|
matthiasm@0
|
175 d7.hasFixedBinCount = true;
|
matthiasm@0
|
176 d7.binCount = 0;
|
matthiasm@0
|
177 d7.hasKnownExtents = false;
|
matthiasm@0
|
178 d7.isQuantized = false;
|
matthiasm@0
|
179 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
180 d7.hasDuration = false;
|
matthiasm@0
|
181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
182 list.push_back(d7);
|
Chris@35
|
183 m_outputChords = index++;
|
matthiasm@0
|
184
|
matthiasm@86
|
185 OutputDescriptor chordnotes;
|
matthiasm@86
|
186 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
187 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
188 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
189 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
190 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
191 chordnotes.binCount = 1;
|
matthiasm@86
|
192 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
193 chordnotes.minValue = 0;
|
matthiasm@86
|
194 chordnotes.maxValue = 127;
|
matthiasm@86
|
195 chordnotes.isQuantized = true;
|
matthiasm@86
|
196 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
198 chordnotes.hasDuration = true;
|
matthiasm@86
|
199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
200 list.push_back(chordnotes);
|
matthiasm@86
|
201 m_outputChordnotes = index++;
|
matthiasm@86
|
202
|
Chris@23
|
203 OutputDescriptor d8;
|
mail@60
|
204 d8.identifier = "harmonicchange";
|
Chris@36
|
205 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
206 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
207 d8.unit = "";
|
matthiasm@17
|
208 d8.hasFixedBinCount = true;
|
matthiasm@17
|
209 d8.binCount = 1;
|
mail@60
|
210 d8.hasKnownExtents = false;
|
matthiasm@17
|
211 d8.isQuantized = false;
|
matthiasm@17
|
212 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
213 d8.hasDuration = false;
|
matthiasm@17
|
214 list.push_back(d8);
|
Chris@35
|
215 m_outputHarmonicChange = index++;
|
matthiasm@1
|
216
|
matthiasm@107
|
217 OutputDescriptor loglikelihood;
|
matthiasm@107
|
218 loglikelihood.identifier = "loglikelihood";
|
mail@126
|
219 loglikelihood.name = "Log-Likelihood of Chord Estimate";
|
mail@124
|
220 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
|
matthiasm@107
|
221 loglikelihood.unit = "";
|
matthiasm@107
|
222 loglikelihood.hasFixedBinCount = true;
|
matthiasm@107
|
223 loglikelihood.binCount = 1;
|
matthiasm@107
|
224 loglikelihood.hasKnownExtents = false;
|
matthiasm@107
|
225 loglikelihood.isQuantized = false;
|
matthiasm@107
|
226 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@107
|
227 loglikelihood.hasDuration = false;
|
matthiasm@107
|
228 list.push_back(loglikelihood);
|
matthiasm@107
|
229 m_outputLoglikelihood = index++;
|
matthiasm@106
|
230
|
matthiasm@0
|
231 return list;
|
matthiasm@0
|
232 }
|
matthiasm@0
|
233
|
matthiasm@0
|
234 bool
|
Chris@35
|
235 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
236 {
|
Chris@23
|
237 if (debug_on) {
|
Chris@23
|
238 cerr << "--> initialise";
|
Chris@23
|
239 }
|
mail@76
|
240
|
Chris@35
|
241 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
242 return false;
|
Chris@35
|
243 }
|
mail@115
|
244 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
|
matthiasm@0
|
245 return true;
|
matthiasm@0
|
246 }
|
matthiasm@0
|
247
|
matthiasm@0
|
248 void
|
Chris@35
|
249 Chordino::reset()
|
matthiasm@0
|
250 {
|
Chris@23
|
251 if (debug_on) cerr << "--> reset";
|
Chris@35
|
252 NNLSBase::reset();
|
matthiasm@0
|
253 }
|
matthiasm@0
|
254
|
Chris@35
|
255 Chordino::FeatureSet
|
Chris@35
|
256 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
257 {
|
Chris@23
|
258 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
259
|
Chris@35
|
260 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
261
|
Chris@35
|
262 return FeatureSet();
|
matthiasm@0
|
263 }
|
matthiasm@0
|
264
|
Chris@35
|
265 Chordino::FeatureSet
|
Chris@35
|
266 Chordino::getRemainingFeatures()
|
matthiasm@0
|
267 {
|
mail@89
|
268 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
269 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
270 FeatureSet fsOut;
|
Chris@35
|
271 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
272 int nChord = m_chordnames.size();
|
Chris@23
|
273 //
|
Chris@23
|
274 /** Calculate Tuning
|
Chris@23
|
275 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
276 cumulative mean real and imag values)
|
Chris@23
|
277 **/
|
mail@80
|
278 float meanTuningImag = 0;
|
mail@80
|
279 float meanTuningReal = 0;
|
mail@80
|
280 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
281 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
282 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
283 }
|
Chris@23
|
284 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
285 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
286 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
287 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
288
|
Chris@23
|
289 char buffer0 [50];
|
matthiasm@1
|
290
|
Chris@23
|
291 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
292
|
matthiasm@1
|
293
|
Chris@23
|
294 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
295 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
296 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
297 **/
|
Chris@35
|
298 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
299
|
Chris@23
|
300 int count = 0;
|
matthiasm@1
|
301
|
Chris@35
|
302 FeatureList tunedSpec;
|
matthiasm@43
|
303 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
304
|
matthiasm@43
|
305 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
306
|
Chris@35
|
307 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
308 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
309 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
310 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
311 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
312 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
313 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
314
|
Chris@23
|
315 if (m_tuneLocal) {
|
Chris@23
|
316 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
317 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
318 }
|
matthiasm@1
|
319
|
mail@80
|
320 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
321
|
Chris@91
|
322 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@115
|
323 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
324 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
325 }
|
matthiasm@1
|
326
|
matthiasm@43
|
327 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
328 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
329 vector<float> runningstd;
|
mail@77
|
330 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
331 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
332 }
|
Chris@23
|
333 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
334 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
335 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
336 if (runningstd[i] > 0) {
|
matthiasm@43
|
337 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
338 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
339 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
340 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
341 }
|
matthiasm@43
|
342 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
343 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
344 }
|
Chris@23
|
345 }
|
matthiasm@43
|
346 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
347 count++;
|
Chris@23
|
348 }
|
Chris@23
|
349 cerr << "done." << endl;
|
matthiasm@1
|
350
|
Chris@23
|
351 /** Semitone spectrum and chromagrams
|
Chris@23
|
352 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
353 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
354 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
355 bass and treble stacked onto each other).
|
Chris@23
|
356 **/
|
matthiasm@42
|
357 if (m_useNNLS == 0) {
|
Chris@35
|
358 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
359 } else {
|
Chris@35
|
360 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
361 }
|
matthiasm@13
|
362
|
matthiasm@1
|
363
|
matthiasm@43
|
364 vector<vector<double> > chordogram;
|
Chris@23
|
365 vector<vector<int> > scoreChordogram;
|
Chris@35
|
366 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
367 count = 0;
|
matthiasm@9
|
368
|
Chris@35
|
369 FeatureList chromaList;
|
matthiasm@43
|
370
|
matthiasm@43
|
371
|
Chris@35
|
372
|
Chris@35
|
373 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
374 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
375 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
376
|
matthiasm@43
|
377 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
378 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
379
|
mail@77
|
380 float b[nNote];
|
matthiasm@1
|
381
|
Chris@23
|
382 bool some_b_greater_zero = false;
|
Chris@23
|
383 float sumb = 0;
|
mail@77
|
384 for (int i = 0; i < nNote; i++) {
|
mail@77
|
385 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
386 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
387 sumb += b[i];
|
Chris@23
|
388 if (b[i] > 0) {
|
Chris@23
|
389 some_b_greater_zero = true;
|
Chris@23
|
390 }
|
Chris@23
|
391 }
|
matthiasm@1
|
392
|
Chris@23
|
393 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
394
|
Chris@23
|
395 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
396 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
397 float currval;
|
Chris@91
|
398 int iSemitone = 0;
|
matthiasm@1
|
399
|
Chris@23
|
400 if (some_b_greater_zero) {
|
matthiasm@42
|
401 if (m_useNNLS == 0) {
|
Chris@91
|
402 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
403 currval = 0;
|
mail@81
|
404 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
405 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
406 }
|
Chris@23
|
407 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
408 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
409 iSemitone++;
|
Chris@23
|
410 }
|
matthiasm@1
|
411
|
Chris@23
|
412 } else {
|
Chris@35
|
413 float x[84+1000];
|
Chris@23
|
414 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
415 vector<int> signifIndex;
|
Chris@23
|
416 int index=0;
|
Chris@23
|
417 sumb /= 84.0;
|
Chris@91
|
418 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
419 float currval = 0;
|
mail@81
|
420 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
421 currval += b[iNote + iBPS];
|
mail@81
|
422 }
|
Chris@23
|
423 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
424 index++;
|
Chris@23
|
425 }
|
Chris@35
|
426 float rnorm;
|
Chris@35
|
427 float w[84+1000];
|
Chris@35
|
428 float zz[84+1000];
|
Chris@23
|
429 int indx[84+1000];
|
Chris@23
|
430 int mode;
|
mail@77
|
431 int dictsize = nNote*signifIndex.size();
|
mail@81
|
432 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
433 float *curr_dict = new float[dictsize];
|
Chris@91
|
434 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
435 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
436 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
437 }
|
Chris@23
|
438 }
|
Chris@35
|
439 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
440 delete [] curr_dict;
|
Chris@91
|
441 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
442 // cerr << mode << endl;
|
Chris@23
|
443 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
444 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
445 }
|
Chris@23
|
446 }
|
Chris@23
|
447 }
|
Chris@35
|
448
|
Chris@35
|
449 vector<float> origchroma = chroma;
|
Chris@23
|
450 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
451 currentChromas.values = chroma;
|
Chris@35
|
452
|
Chris@23
|
453 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
454 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
455 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
456 case 0: // should never end up here
|
Chris@23
|
457 break;
|
Chris@23
|
458 case 1:
|
Chris@35
|
459 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
460 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
461 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
462 break;
|
Chris@23
|
463 case 2:
|
Chris@35
|
464 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
465 chromanorm[2] += *it;
|
Chris@23
|
466 }
|
Chris@23
|
467 break;
|
Chris@23
|
468 case 3:
|
Chris@35
|
469 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
470 chromanorm[2] += pow(*it,2);
|
Chris@23
|
471 }
|
Chris@23
|
472 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
473 break;
|
Chris@23
|
474 }
|
Chris@23
|
475 if (chromanorm[2] > 0) {
|
Chris@91
|
476 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
477 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
478 }
|
Chris@23
|
479 }
|
Chris@23
|
480 }
|
Chris@35
|
481
|
matthiasm@43
|
482 chromaList.push_back(currentChromas);
|
Chris@35
|
483
|
Chris@23
|
484 // local chord estimation
|
matthiasm@43
|
485 vector<double> currentChordSalience;
|
matthiasm@43
|
486 double tempchordvalue = 0;
|
matthiasm@43
|
487 double sumchordvalue = 0;
|
matthiasm@9
|
488
|
Chris@23
|
489 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
490 tempchordvalue = 0;
|
Chris@23
|
491 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
492 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
493 }
|
Chris@23
|
494 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
495 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
496 }
|
matthiasm@48
|
497 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
498 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
499 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
500 sumchordvalue+=tempchordvalue;
|
Chris@23
|
501 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
502 }
|
Chris@23
|
503 if (sumchordvalue > 0) {
|
Chris@23
|
504 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
505 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
506 }
|
Chris@23
|
507 } else {
|
Chris@23
|
508 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
509 }
|
Chris@23
|
510 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
511
|
Chris@23
|
512 count++;
|
Chris@23
|
513 }
|
Chris@23
|
514 cerr << "done." << endl;
|
matthiasm@13
|
515
|
matthiasm@86
|
516 vector<Feature> oldnotes;
|
matthiasm@10
|
517
|
matthiasm@131
|
518 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@131
|
519 int oldchord = nChord-1;
|
matthiasm@131
|
520 double selftransprob = 0.99;
|
matthiasm@131
|
521
|
matthiasm@131
|
522 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@131
|
523 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@131
|
524
|
matthiasm@131
|
525 double *delta;
|
matthiasm@131
|
526 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@131
|
527
|
matthiasm@131
|
528 vector<vector<double> > trans;
|
matthiasm@131
|
529 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@131
|
530 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@131
|
531 temp[iChord] = selftransprob;
|
matthiasm@131
|
532 trans.push_back(temp);
|
matthiasm@131
|
533 }
|
matthiasm@131
|
534 vector<double> scale;
|
matthiasm@131
|
535 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
|
matthiasm@131
|
536
|
matthiasm@48
|
537
|
matthiasm@131
|
538 Feature chord_feature; // chord estimate
|
matthiasm@131
|
539 chord_feature.hasTimestamp = true;
|
matthiasm@131
|
540 chord_feature.timestamp = timestamps[0];
|
matthiasm@131
|
541 chord_feature.label = m_chordnames[chordpath[0]];
|
matthiasm@131
|
542 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@131
|
543
|
matthiasm@131
|
544 chordchange[0] = 0;
|
matthiasm@131
|
545 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@131
|
546 // cerr << chordpath[iFrame] << endl;
|
matthiasm@131
|
547 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@131
|
548 // chord
|
matthiasm@131
|
549 Feature chord_feature; // chord estimate
|
matthiasm@131
|
550 chord_feature.hasTimestamp = true;
|
matthiasm@131
|
551 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@131
|
552 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
matthiasm@131
|
553 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@131
|
554 oldchord = chordpath[iFrame];
|
matthiasm@131
|
555 // chord notes
|
matthiasm@131
|
556 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@131
|
557 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@131
|
558 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
Chris@23
|
559 }
|
matthiasm@131
|
560 oldnotes.clear();
|
matthiasm@131
|
561 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@131
|
562 Feature chordnote_feature;
|
matthiasm@131
|
563 chordnote_feature.hasTimestamp = true;
|
matthiasm@131
|
564 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@131
|
565 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@131
|
566 chordnote_feature.hasDuration = true;
|
matthiasm@131
|
567 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@131
|
568 oldnotes.push_back(chordnote_feature);
|
matthiasm@50
|
569 }
|
Chris@23
|
570 }
|
matthiasm@131
|
571 /* calculating simple chord change prob */
|
matthiasm@131
|
572 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@131
|
573 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
Chris@23
|
574 }
|
Chris@23
|
575 }
|
matthiasm@131
|
576
|
matthiasm@131
|
577 float logscale = 0;
|
matthiasm@131
|
578 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@131
|
579 logscale -= log(scale[iFrame]);
|
matthiasm@131
|
580 Feature loglikelihood;
|
matthiasm@131
|
581 loglikelihood.hasTimestamp = true;
|
matthiasm@131
|
582 loglikelihood.timestamp = timestamps[iFrame];
|
matthiasm@131
|
583 loglikelihood.values.push_back(-log(scale[iFrame]));
|
matthiasm@131
|
584 // cerr << chordchange[iFrame] << endl;
|
matthiasm@131
|
585 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
|
matthiasm@131
|
586 }
|
matthiasm@131
|
587 logscale /= nFrame;
|
matthiasm@131
|
588
|
matthiasm@43
|
589 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
590 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
591 chord_feature.label = "N";
|
mail@60
|
592 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
593
|
Chris@91
|
594 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
595 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
596 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
597 }
|
matthiasm@86
|
598
|
Chris@23
|
599 cerr << "done." << endl;
|
matthiasm@50
|
600
|
matthiasm@50
|
601 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
602 Feature chordchange_feature;
|
matthiasm@50
|
603 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
604 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
605 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
606 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
607 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
608 }
|
matthiasm@50
|
609
|
mail@60
|
610 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
611
|
matthiasm@50
|
612
|
Chris@23
|
613 return fsOut;
|
matthiasm@0
|
614 }
|