Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@27
|
31 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
32
|
Chris@35
|
33 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
34 Plugin(inputSampleRate),
|
Chris@35
|
35 m_logSpectrum(0),
|
Chris@23
|
36 m_blockSize(0),
|
Chris@23
|
37 m_stepSize(0),
|
Chris@23
|
38 m_lengthOfNoteIndex(0),
|
Chris@23
|
39 m_meanTuning0(0),
|
Chris@23
|
40 m_meanTuning1(0),
|
Chris@23
|
41 m_meanTuning2(0),
|
Chris@23
|
42 m_localTuning0(0),
|
Chris@23
|
43 m_localTuning1(0),
|
Chris@23
|
44 m_localTuning2(0),
|
mail@41
|
45 m_whitening(1.0),
|
Chris@23
|
46 m_preset(0.0),
|
Chris@23
|
47 m_localTuning(0),
|
Chris@23
|
48 m_kernelValue(0),
|
Chris@23
|
49 m_kernelFftIndex(0),
|
Chris@23
|
50 m_kernelNoteIndex(0),
|
Chris@23
|
51 m_dict(0),
|
Chris@23
|
52 m_tuneLocal(false),
|
Chris@23
|
53 m_chorddict(0),
|
Chris@23
|
54 m_chordnames(0),
|
Chris@23
|
55 m_doNormalizeChroma(0),
|
mail@41
|
56 m_rollon(0.0),
|
matthiasm@42
|
57 m_s(0.7),
|
matthiasm@42
|
58 m_useNNLS(1)
|
matthiasm@0
|
59 {
|
Chris@35
|
60 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
61
|
Chris@23
|
62 // make the *note* dictionary matrix
|
Chris@23
|
63 m_dict = new float[nNote * 84];
|
Chris@23
|
64 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
65 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@7
|
66
|
Chris@23
|
67 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
68 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
69 }
|
matthiasm@0
|
70
|
matthiasm@0
|
71
|
Chris@35
|
72 NNLSBase::~NNLSBase()
|
matthiasm@0
|
73 {
|
Chris@35
|
74 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
75 delete [] m_dict;
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77
|
matthiasm@0
|
78 string
|
Chris@35
|
79 NNLSBase::getMaker() const
|
matthiasm@0
|
80 {
|
Chris@23
|
81 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
82 // Your name here
|
matthiasm@0
|
83 return "Matthias Mauch";
|
matthiasm@0
|
84 }
|
matthiasm@0
|
85
|
matthiasm@0
|
86 int
|
Chris@35
|
87 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
88 {
|
Chris@23
|
89 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
90 // Increment this each time you release a version that behaves
|
matthiasm@0
|
91 // differently from the previous one
|
matthiasm@0
|
92 return 1;
|
matthiasm@0
|
93 }
|
matthiasm@0
|
94
|
matthiasm@0
|
95 string
|
Chris@35
|
96 NNLSBase::getCopyright() const
|
matthiasm@0
|
97 {
|
Chris@23
|
98 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
99 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
100 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
101 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
102 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
103 return "GPL";
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
Chris@35
|
106 NNLSBase::InputDomain
|
Chris@35
|
107 NNLSBase::getInputDomain() const
|
matthiasm@0
|
108 {
|
Chris@23
|
109 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
110 return FrequencyDomain;
|
matthiasm@0
|
111 }
|
matthiasm@0
|
112
|
matthiasm@0
|
113 size_t
|
Chris@35
|
114 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
115 {
|
Chris@23
|
116 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
117 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
118 }
|
matthiasm@0
|
119
|
matthiasm@0
|
120 size_t
|
Chris@35
|
121 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
122 {
|
Chris@23
|
123 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
124 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
125 // means the same as the block size for TimeDomain
|
Chris@23
|
126 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
127 }
|
matthiasm@0
|
128
|
matthiasm@0
|
129 size_t
|
Chris@35
|
130 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
131 {
|
Chris@23
|
132 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
133 return 1;
|
matthiasm@0
|
134 }
|
matthiasm@0
|
135
|
matthiasm@0
|
136 size_t
|
Chris@35
|
137 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
138 {
|
Chris@23
|
139 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
140 return 1;
|
matthiasm@0
|
141 }
|
matthiasm@0
|
142
|
Chris@35
|
143 NNLSBase::ParameterList
|
Chris@35
|
144 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
145 {
|
Chris@23
|
146 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
147 ParameterList list;
|
matthiasm@0
|
148
|
matthiasm@42
|
149 ParameterDescriptor d;
|
matthiasm@42
|
150 d.identifier = "useNNLS";
|
matthiasm@42
|
151 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
152 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
153 d.unit = "";
|
matthiasm@42
|
154 d.minValue = 0.0;
|
matthiasm@42
|
155 d.maxValue = 1.0;
|
matthiasm@42
|
156 d.defaultValue = 1.0;
|
matthiasm@42
|
157 d.isQuantized = true;
|
matthiasm@42
|
158 d.quantizeStep = 1.0;
|
matthiasm@42
|
159 list.push_back(d);
|
matthiasm@42
|
160
|
mail@41
|
161 ParameterDescriptor d0;
|
mail@41
|
162 d0.identifier = "rollon";
|
mail@41
|
163 d0.name = "spectral roll-on";
|
mail@41
|
164 d0.description = "The bins below the spectral roll-on quantile will be set to 0.";
|
mail@41
|
165 d0.unit = "";
|
mail@41
|
166 d0.minValue = 0;
|
mail@41
|
167 d0.maxValue = 0.05;
|
mail@41
|
168 d0.defaultValue = 0;
|
mail@41
|
169 d0.isQuantized = false;
|
mail@41
|
170 list.push_back(d0);
|
matthiasm@4
|
171
|
matthiasm@4
|
172 ParameterDescriptor d1;
|
matthiasm@4
|
173 d1.identifier = "tuningmode";
|
matthiasm@4
|
174 d1.name = "tuning mode";
|
matthiasm@4
|
175 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
176 d1.unit = "";
|
matthiasm@4
|
177 d1.minValue = 0;
|
matthiasm@4
|
178 d1.maxValue = 1;
|
matthiasm@4
|
179 d1.defaultValue = 0;
|
matthiasm@4
|
180 d1.isQuantized = true;
|
matthiasm@4
|
181 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
182 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
183 d1.quantizeStep = 1.0;
|
matthiasm@4
|
184 list.push_back(d1);
|
matthiasm@4
|
185
|
mail@41
|
186 ParameterDescriptor d2;
|
mail@41
|
187 d2.identifier = "whitening";
|
mail@41
|
188 d2.name = "spectral whitening";
|
mail@41
|
189 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
190 d2.unit = "";
|
mail@41
|
191 d2.isQuantized = true;
|
mail@41
|
192 d2.minValue = 0.0;
|
mail@41
|
193 d2.maxValue = 1.0;
|
mail@41
|
194 d2.defaultValue = 1.0;
|
mail@41
|
195 d2.isQuantized = false;
|
mail@41
|
196 list.push_back(d2);
|
mail@41
|
197
|
mail@41
|
198 ParameterDescriptor d3;
|
mail@41
|
199 d3.identifier = "s";
|
mail@41
|
200 d3.name = "spectral shape";
|
mail@41
|
201 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
202 d3.unit = "";
|
mail@41
|
203 d3.minValue = 0.5;
|
mail@41
|
204 d3.maxValue = 0.9;
|
mail@41
|
205 d3.defaultValue = 0.7;
|
mail@41
|
206 d3.isQuantized = false;
|
mail@41
|
207 list.push_back(d3);
|
mail@41
|
208
|
Chris@23
|
209 ParameterDescriptor d4;
|
matthiasm@12
|
210 d4.identifier = "chromanormalize";
|
matthiasm@12
|
211 d4.name = "chroma normalization";
|
matthiasm@12
|
212 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
213 d4.unit = "";
|
matthiasm@12
|
214 d4.minValue = 0;
|
matthiasm@13
|
215 d4.maxValue = 3;
|
matthiasm@12
|
216 d4.defaultValue = 0;
|
matthiasm@12
|
217 d4.isQuantized = true;
|
matthiasm@13
|
218 d4.valueNames.push_back("none");
|
matthiasm@13
|
219 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
220 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
221 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
222 d4.quantizeStep = 1.0;
|
matthiasm@12
|
223 list.push_back(d4);
|
matthiasm@4
|
224
|
matthiasm@0
|
225 return list;
|
matthiasm@0
|
226 }
|
matthiasm@0
|
227
|
matthiasm@0
|
228 float
|
Chris@35
|
229 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
230 {
|
Chris@23
|
231 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
232 if (identifier == "useNNLS") {
|
matthiasm@42
|
233 return m_useNNLS;
|
matthiasm@0
|
234 }
|
matthiasm@0
|
235
|
mail@41
|
236 if (identifier == "whitening") {
|
mail@41
|
237 return m_whitening;
|
mail@41
|
238 }
|
mail@41
|
239
|
mail@41
|
240 if (identifier == "s") {
|
mail@41
|
241 return m_s;
|
matthiasm@0
|
242 }
|
matthiasm@17
|
243
|
Chris@23
|
244 if (identifier == "rollon") {
|
matthiasm@17
|
245 return m_rollon;
|
matthiasm@17
|
246 }
|
matthiasm@0
|
247
|
matthiasm@0
|
248 if (identifier == "tuningmode") {
|
matthiasm@0
|
249 if (m_tuneLocal) {
|
matthiasm@0
|
250 return 1.0;
|
matthiasm@0
|
251 } else {
|
matthiasm@0
|
252 return 0.0;
|
matthiasm@0
|
253 }
|
matthiasm@0
|
254 }
|
Chris@23
|
255 if (identifier == "preset") {
|
Chris@23
|
256 return m_preset;
|
matthiasm@3
|
257 }
|
Chris@23
|
258 if (identifier == "chromanormalize") {
|
Chris@23
|
259 return m_doNormalizeChroma;
|
matthiasm@12
|
260 }
|
matthiasm@0
|
261 return 0;
|
matthiasm@0
|
262
|
matthiasm@0
|
263 }
|
matthiasm@0
|
264
|
matthiasm@0
|
265 void
|
Chris@35
|
266 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
267 {
|
Chris@23
|
268 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
269 if (identifier == "useNNLS") {
|
matthiasm@42
|
270 m_useNNLS = (int) value;
|
matthiasm@0
|
271 }
|
matthiasm@0
|
272
|
mail@41
|
273 if (identifier == "whitening") {
|
mail@41
|
274 m_whitening = value;
|
matthiasm@0
|
275 }
|
matthiasm@0
|
276
|
mail@41
|
277 if (identifier == "s") {
|
mail@41
|
278 m_s = value;
|
mail@41
|
279 }
|
mail@41
|
280
|
matthiasm@0
|
281 if (identifier == "tuningmode") {
|
matthiasm@0
|
282 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
283 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
284 }
|
matthiasm@42
|
285 // if (identifier == "preset") {
|
matthiasm@42
|
286 // m_preset = value;
|
matthiasm@42
|
287 // if (m_preset == 0.0) {
|
matthiasm@42
|
288 // m_tuneLocal = false;
|
matthiasm@42
|
289 // m_whitening = 1.0;
|
matthiasm@42
|
290 // m_dictID = 0.0;
|
matthiasm@42
|
291 // }
|
matthiasm@42
|
292 // if (m_preset == 1.0) {
|
matthiasm@42
|
293 // m_tuneLocal = false;
|
matthiasm@42
|
294 // m_whitening = 1.0;
|
matthiasm@42
|
295 // m_dictID = 1.0;
|
matthiasm@42
|
296 // }
|
matthiasm@42
|
297 // if (m_preset == 2.0) {
|
matthiasm@42
|
298 // m_tuneLocal = false;
|
matthiasm@42
|
299 // m_whitening = 0.7;
|
matthiasm@42
|
300 // m_dictID = 0.0;
|
matthiasm@42
|
301 // }
|
matthiasm@42
|
302 // }
|
Chris@23
|
303 if (identifier == "chromanormalize") {
|
Chris@23
|
304 m_doNormalizeChroma = value;
|
Chris@23
|
305 }
|
matthiasm@17
|
306
|
Chris@23
|
307 if (identifier == "rollon") {
|
Chris@23
|
308 m_rollon = value;
|
Chris@23
|
309 }
|
matthiasm@0
|
310 }
|
matthiasm@0
|
311
|
Chris@35
|
312 NNLSBase::ProgramList
|
Chris@35
|
313 NNLSBase::getPrograms() const
|
matthiasm@0
|
314 {
|
Chris@23
|
315 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
316 ProgramList list;
|
matthiasm@0
|
317
|
matthiasm@0
|
318 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
319 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
320
|
matthiasm@0
|
321 return list;
|
matthiasm@0
|
322 }
|
matthiasm@0
|
323
|
matthiasm@0
|
324 string
|
Chris@35
|
325 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
326 {
|
Chris@23
|
327 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
328 return ""; // no programs
|
matthiasm@0
|
329 }
|
matthiasm@0
|
330
|
matthiasm@0
|
331 void
|
Chris@35
|
332 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
333 {
|
Chris@23
|
334 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
335 }
|
matthiasm@0
|
336
|
matthiasm@0
|
337
|
matthiasm@0
|
338 bool
|
Chris@35
|
339 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
340 {
|
Chris@23
|
341 if (debug_on) {
|
Chris@23
|
342 cerr << "--> initialise";
|
Chris@23
|
343 }
|
matthiasm@1
|
344
|
matthiasm@0
|
345 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
346 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
347 m_blockSize = blockSize;
|
matthiasm@0
|
348 m_stepSize = stepSize;
|
Chris@35
|
349 m_frameCount = 0;
|
Chris@23
|
350 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
351 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
352 float *tempkernel;
|
matthiasm@1
|
353
|
Chris@23
|
354 tempkernel = new float[tempn];
|
matthiasm@1
|
355
|
Chris@23
|
356 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
357 m_kernelValue.clear();
|
Chris@23
|
358 m_kernelFftIndex.clear();
|
Chris@23
|
359 m_kernelNoteIndex.clear();
|
Chris@23
|
360 int countNonzero = 0;
|
Chris@23
|
361 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
362 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
363 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
364 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
365 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
366 countNonzero++;
|
Chris@23
|
367 }
|
Chris@23
|
368 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
369 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
370 }
|
Chris@23
|
371 }
|
Chris@23
|
372 }
|
Chris@23
|
373 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
374 delete [] tempkernel;
|
Chris@35
|
375 /*
|
Chris@23
|
376 ofstream myfile;
|
Chris@23
|
377 myfile.open ("matrix.txt");
|
matthiasm@3
|
378 // myfile << "Writing this to a file.\n";
|
Chris@23
|
379 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
380 myfile << m_dict[i] << endl;
|
Chris@23
|
381 }
|
matthiasm@3
|
382 myfile.close();
|
Chris@35
|
383 */
|
matthiasm@0
|
384 return true;
|
matthiasm@0
|
385 }
|
matthiasm@0
|
386
|
matthiasm@0
|
387 void
|
Chris@35
|
388 NNLSBase::reset()
|
matthiasm@0
|
389 {
|
Chris@23
|
390 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
391
|
matthiasm@0
|
392 // Clear buffers, reset stored values, etc
|
Chris@35
|
393 m_frameCount = 0;
|
matthiasm@42
|
394 // m_dictID = 0;
|
Chris@35
|
395 m_logSpectrum.clear();
|
Chris@23
|
396 m_meanTuning0 = 0;
|
Chris@23
|
397 m_meanTuning1 = 0;
|
Chris@23
|
398 m_meanTuning2 = 0;
|
Chris@23
|
399 m_localTuning0 = 0;
|
Chris@23
|
400 m_localTuning1 = 0;
|
Chris@23
|
401 m_localTuning2 = 0;
|
Chris@23
|
402 m_localTuning.clear();
|
matthiasm@0
|
403 }
|
matthiasm@0
|
404
|
Chris@35
|
405 void
|
Chris@35
|
406 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
407 {
|
Chris@35
|
408 m_frameCount++;
|
Chris@23
|
409 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
410
|
Chris@23
|
411 const float *fbuf = inputBuffers[0];
|
Chris@23
|
412 float energysum = 0;
|
Chris@23
|
413 // make magnitude
|
Chris@23
|
414 float maxmag = -10000;
|
Chris@23
|
415 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
416 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
417 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
418 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
419 if (m_rollon > 0) {
|
Chris@23
|
420 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
421 }
|
Chris@23
|
422 }
|
matthiasm@14
|
423
|
Chris@23
|
424 float cumenergy = 0;
|
Chris@23
|
425 if (m_rollon > 0) {
|
Chris@23
|
426 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
427 cumenergy += pow(magnitude[iBin],2);
|
Chris@23
|
428 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
|
Chris@23
|
429 else break;
|
Chris@23
|
430 }
|
Chris@23
|
431 }
|
matthiasm@17
|
432
|
Chris@23
|
433 if (maxmag < 2) {
|
Chris@23
|
434 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
435 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
436 magnitude[iBin] = 0;
|
Chris@23
|
437 }
|
Chris@23
|
438 }
|
matthiasm@4
|
439
|
Chris@23
|
440 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
441 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
442 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
443 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
444 }
|
Chris@23
|
445 int binCount = 0;
|
Chris@23
|
446 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
447 // cerr << ".";
|
Chris@23
|
448 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
449 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
450 binCount++;
|
Chris@23
|
451 }
|
Chris@23
|
452 // cerr << nm[20];
|
Chris@23
|
453 // cerr << endl;
|
matthiasm@0
|
454
|
matthiasm@0
|
455
|
Chris@35
|
456 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
457 // update means of complex tuning variables
|
Chris@35
|
458 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
459 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
460 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
|
matthiasm@0
|
461
|
matthiasm@0
|
462 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
463 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
464 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
465 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
466 float ratioOld = 0.997;
|
matthiasm@3
|
467 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
468 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
469 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
470 }
|
matthiasm@0
|
471
|
matthiasm@0
|
472 // if (m_tuneLocal) {
|
Chris@23
|
473 // local tuning
|
Chris@23
|
474 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
475 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
476 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
477 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
478
|
Chris@23
|
479 Feature f1; // logfreqspec
|
Chris@23
|
480 f1.hasTimestamp = true;
|
matthiasm@0
|
481 f1.timestamp = timestamp;
|
Chris@23
|
482 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
483 f1.values.push_back(nm[iNote]);
|
Chris@23
|
484 }
|
matthiasm@0
|
485
|
matthiasm@0
|
486 // deletes
|
matthiasm@0
|
487 delete[] magnitude;
|
matthiasm@0
|
488 delete[] nm;
|
matthiasm@0
|
489
|
Chris@35
|
490 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
491 }
|
matthiasm@0
|
492
|
Chris@35
|
493
|
Chris@35
|
494 #ifdef NOT_DEFINED
|
Chris@35
|
495
|
Chris@35
|
496 NNLSBase::FeatureSet
|
Chris@35
|
497 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
498 {
|
Chris@23
|
499 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
500 FeatureSet fsOut;
|
Chris@35
|
501 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
502 int nChord = m_chordnames.size();
|
Chris@23
|
503 //
|
Chris@23
|
504 /** Calculate Tuning
|
Chris@23
|
505 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
506 cumulative mean real and imag values)
|
Chris@23
|
507 **/
|
Chris@23
|
508 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
509 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
510 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
511 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
512 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
513 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
514
|
Chris@23
|
515 char buffer0 [50];
|
matthiasm@1
|
516
|
Chris@23
|
517 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
518
|
Chris@23
|
519 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
520
|
Chris@23
|
521 // push tuning to FeatureSet fsOut
|
Chris@23
|
522 Feature f0; // tuning
|
Chris@23
|
523 f0.hasTimestamp = true;
|
Chris@23
|
524 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
525 f0.label = buffer0;
|
Chris@23
|
526 fsOut[0].push_back(f0);
|
matthiasm@1
|
527
|
Chris@23
|
528 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
529 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
530 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
531 **/
|
Chris@23
|
532 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
533
|
Chris@23
|
534 float tempValue = 0;
|
Chris@23
|
535 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
536 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
537 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
538 int count = 0;
|
matthiasm@1
|
539
|
Chris@35
|
540 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
541 Feature f1 = *i;
|
Chris@23
|
542 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
543 f2.hasTimestamp = true;
|
Chris@23
|
544 f2.timestamp = f1.timestamp;
|
Chris@23
|
545 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
546
|
Chris@23
|
547 if (m_tuneLocal) {
|
Chris@23
|
548 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
549 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
550 }
|
matthiasm@1
|
551
|
Chris@23
|
552 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
553
|
Chris@23
|
554 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
555 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
556 f2.values.push_back(tempValue);
|
Chris@23
|
557 }
|
matthiasm@1
|
558
|
Chris@23
|
559 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
560 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
561 vector<float> runningstd;
|
Chris@23
|
562 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
563 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
564 }
|
Chris@23
|
565 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
566 for (int i = 0; i < 256; i++) {
|
Chris@23
|
567 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
568 if (runningstd[i] > 0) {
|
Chris@23
|
569 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@41
|
570 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
571 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@41
|
572 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
573 }
|
Chris@23
|
574 if (f2.values[i] < 0) {
|
Chris@23
|
575 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
576 }
|
Chris@23
|
577 }
|
Chris@23
|
578 fsOut[2].push_back(f2);
|
Chris@23
|
579 count++;
|
Chris@23
|
580 }
|
Chris@23
|
581 cerr << "done." << endl;
|
matthiasm@1
|
582
|
Chris@23
|
583 /** Semitone spectrum and chromagrams
|
Chris@23
|
584 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
585 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
586 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
587 bass and treble stacked onto each other).
|
Chris@23
|
588 **/
|
matthiasm@42
|
589 if (m_useNNLS == 0) {
|
Chris@23
|
590 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
591 } else {
|
Chris@23
|
592 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
593 }
|
matthiasm@13
|
594
|
matthiasm@1
|
595
|
Chris@23
|
596 vector<vector<float> > chordogram;
|
Chris@23
|
597 vector<vector<int> > scoreChordogram;
|
Chris@23
|
598 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
599 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
600 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
601 count = 0;
|
matthiasm@9
|
602
|
Chris@23
|
603 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
604 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
605 Feature f3; // semitone spectrum
|
Chris@23
|
606 Feature f4; // treble chromagram
|
Chris@23
|
607 Feature f5; // bass chromagram
|
Chris@23
|
608 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
609
|
Chris@23
|
610 f3.hasTimestamp = true;
|
Chris@23
|
611 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
612
|
Chris@23
|
613 f4.hasTimestamp = true;
|
Chris@23
|
614 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
615
|
Chris@23
|
616 f5.hasTimestamp = true;
|
Chris@23
|
617 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
618
|
Chris@23
|
619 f6.hasTimestamp = true;
|
Chris@23
|
620 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
621
|
Chris@29
|
622 float b[256];
|
matthiasm@1
|
623
|
Chris@23
|
624 bool some_b_greater_zero = false;
|
Chris@23
|
625 float sumb = 0;
|
Chris@23
|
626 for (int i = 0; i < 256; i++) {
|
Chris@23
|
627 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
628 b[i] = f2.values[i];
|
Chris@23
|
629 sumb += b[i];
|
Chris@23
|
630 if (b[i] > 0) {
|
Chris@23
|
631 some_b_greater_zero = true;
|
Chris@23
|
632 }
|
Chris@23
|
633 }
|
matthiasm@1
|
634
|
Chris@23
|
635 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
636
|
Chris@23
|
637 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
638 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
639 float currval;
|
Chris@23
|
640 unsigned iSemitone = 0;
|
matthiasm@1
|
641
|
Chris@23
|
642 if (some_b_greater_zero) {
|
matthiasm@42
|
643 if (m_useNNLS == 0) {
|
Chris@23
|
644 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
645 currval = 0;
|
Chris@23
|
646 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
647 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
648 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
649 f3.values.push_back(currval);
|
Chris@23
|
650 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
651 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
652 iSemitone++;
|
Chris@23
|
653 }
|
matthiasm@1
|
654
|
Chris@23
|
655 } else {
|
Chris@29
|
656 float x[84+1000];
|
Chris@23
|
657 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
658 vector<int> signifIndex;
|
Chris@23
|
659 int index=0;
|
Chris@23
|
660 sumb /= 84.0;
|
Chris@23
|
661 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
662 float currval = 0;
|
Chris@23
|
663 currval += b[iNote + 1 + -1];
|
Chris@23
|
664 currval += b[iNote + 1 + 0];
|
Chris@23
|
665 currval += b[iNote + 1 + 1];
|
Chris@23
|
666 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
667 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
668 index++;
|
Chris@23
|
669 }
|
Chris@29
|
670 float rnorm;
|
Chris@29
|
671 float w[84+1000];
|
Chris@29
|
672 float zz[84+1000];
|
Chris@23
|
673 int indx[84+1000];
|
Chris@23
|
674 int mode;
|
Chris@23
|
675 int dictsize = 256*signifIndex.size();
|
Chris@23
|
676 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
677 float *curr_dict = new float[dictsize];
|
Chris@23
|
678 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
679 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
680 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
681 }
|
Chris@23
|
682 }
|
Chris@29
|
683 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
684 delete [] curr_dict;
|
Chris@23
|
685 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
686 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
687 // cerr << mode << endl;
|
Chris@23
|
688 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
689 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
690 }
|
Chris@23
|
691 }
|
Chris@23
|
692 }
|
matthiasm@13
|
693
|
matthiasm@10
|
694
|
matthiasm@12
|
695
|
matthiasm@13
|
696
|
Chris@23
|
697 f4.values = chroma;
|
Chris@23
|
698 f5.values = basschroma;
|
Chris@23
|
699 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
700 f6.values = chroma;
|
matthiasm@1
|
701
|
Chris@23
|
702 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
703 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
704 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
705 case 0: // should never end up here
|
Chris@23
|
706 break;
|
Chris@23
|
707 case 1:
|
Chris@23
|
708 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
709 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
710 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
711 break;
|
Chris@23
|
712 case 2:
|
Chris@23
|
713 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
714 chromanorm[0] += *it;
|
Chris@23
|
715 }
|
Chris@23
|
716 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
717 chromanorm[1] += *it;
|
Chris@23
|
718 }
|
Chris@23
|
719 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
720 chromanorm[2] += *it;
|
Chris@23
|
721 }
|
Chris@23
|
722 break;
|
Chris@23
|
723 case 3:
|
Chris@23
|
724 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
725 chromanorm[0] += pow(*it,2);
|
Chris@23
|
726 }
|
Chris@23
|
727 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
728 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
729 chromanorm[1] += pow(*it,2);
|
Chris@23
|
730 }
|
Chris@23
|
731 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
732 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
733 chromanorm[2] += pow(*it,2);
|
Chris@23
|
734 }
|
Chris@23
|
735 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
736 break;
|
Chris@23
|
737 }
|
Chris@23
|
738 if (chromanorm[0] > 0) {
|
Chris@23
|
739 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
740 f4.values[i] /= chromanorm[0];
|
Chris@23
|
741 }
|
Chris@23
|
742 }
|
Chris@23
|
743 if (chromanorm[1] > 0) {
|
Chris@23
|
744 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
745 f5.values[i] /= chromanorm[1];
|
Chris@23
|
746 }
|
Chris@23
|
747 }
|
Chris@23
|
748 if (chromanorm[2] > 0) {
|
Chris@23
|
749 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
750 f6.values[i] /= chromanorm[2];
|
Chris@23
|
751 }
|
Chris@23
|
752 }
|
matthiasm@13
|
753
|
Chris@23
|
754 }
|
matthiasm@13
|
755
|
Chris@23
|
756 // local chord estimation
|
Chris@23
|
757 vector<float> currentChordSalience;
|
Chris@23
|
758 float tempchordvalue = 0;
|
Chris@23
|
759 float sumchordvalue = 0;
|
matthiasm@9
|
760
|
Chris@23
|
761 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
762 tempchordvalue = 0;
|
Chris@23
|
763 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
764 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
765 }
|
Chris@23
|
766 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
767 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
768 }
|
Chris@23
|
769 sumchordvalue+=tempchordvalue;
|
Chris@23
|
770 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
771 }
|
Chris@23
|
772 if (sumchordvalue > 0) {
|
Chris@23
|
773 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
774 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
775 }
|
Chris@23
|
776 } else {
|
Chris@23
|
777 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
778 }
|
Chris@23
|
779 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
780
|
Chris@23
|
781 fsOut[3].push_back(f3);
|
Chris@23
|
782 fsOut[4].push_back(f4);
|
Chris@23
|
783 fsOut[5].push_back(f5);
|
Chris@23
|
784 fsOut[6].push_back(f6);
|
Chris@23
|
785 count++;
|
Chris@23
|
786 }
|
Chris@23
|
787 cerr << "done." << endl;
|
matthiasm@13
|
788
|
matthiasm@10
|
789
|
Chris@23
|
790 /* Simple chord estimation
|
Chris@23
|
791 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
792 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
793 */
|
Chris@23
|
794 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
795 count = 0;
|
Chris@23
|
796 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
797 vector<int> chordSequence;
|
Chris@23
|
798 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
799 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
800 scoreChordogram.push_back(temp);
|
Chris@23
|
801 }
|
Chris@23
|
802 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
803 int startIndex = count + 1;
|
Chris@23
|
804 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
805
|
Chris@23
|
806 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
807
|
Chris@23
|
808 vector<int> chordCandidates;
|
Chris@23
|
809 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
810 // float currsum = 0;
|
Chris@23
|
811 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
812 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
813 // }
|
Chris@23
|
814 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
815 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
816 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
817 chordCandidates.push_back(iChord);
|
Chris@23
|
818 break;
|
Chris@23
|
819 }
|
Chris@23
|
820 }
|
Chris@23
|
821 }
|
Chris@23
|
822 chordCandidates.push_back(nChord-1);
|
Chris@23
|
823 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
824
|
Chris@23
|
825 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
826 float maxindex = 0; //... and the index thereof
|
Chris@23
|
827 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
828 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
829
|
Chris@23
|
830 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
831 // now find the max values on both sides of iWF
|
Chris@23
|
832 // left side:
|
Chris@23
|
833 float maxL = 0;
|
Chris@23
|
834 unsigned maxindL = nChord-1;
|
Chris@23
|
835 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
836 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
837 float currsum = 0;
|
Chris@23
|
838 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
839 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
840 }
|
Chris@23
|
841 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
842 if (currsum > maxL) {
|
Chris@23
|
843 maxL = currsum;
|
Chris@23
|
844 maxindL = iChord;
|
Chris@23
|
845 }
|
Chris@23
|
846 }
|
Chris@23
|
847 // right side:
|
Chris@23
|
848 float maxR = 0;
|
Chris@23
|
849 unsigned maxindR = nChord-1;
|
Chris@23
|
850 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
851 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
852 float currsum = 0;
|
Chris@23
|
853 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
854 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
855 }
|
Chris@23
|
856 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
857 if (currsum > maxR) {
|
Chris@23
|
858 maxR = currsum;
|
Chris@23
|
859 maxindR = iChord;
|
Chris@23
|
860 }
|
Chris@23
|
861 }
|
Chris@23
|
862 if (maxL+maxR > maxval) {
|
Chris@23
|
863 maxval = maxL+maxR;
|
Chris@23
|
864 maxindex = iWF;
|
Chris@23
|
865 bestchordL = maxindL;
|
Chris@23
|
866 bestchordR = maxindR;
|
Chris@23
|
867 }
|
matthiasm@3
|
868
|
Chris@23
|
869 }
|
Chris@23
|
870 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
871 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
872 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
873 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
874 }
|
Chris@23
|
875 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
876 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
877 }
|
Chris@23
|
878 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
879 count++;
|
Chris@23
|
880 }
|
Chris@23
|
881 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
882 count = 0;
|
Chris@23
|
883 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
884 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
885 float maxindex = 0; //... and the index thereof
|
Chris@23
|
886 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
887 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
888 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
889 maxindex = iChord;
|
Chris@23
|
890 // cerr << iChord << endl;
|
Chris@23
|
891 }
|
Chris@23
|
892 }
|
Chris@23
|
893 chordSequence.push_back(maxindex);
|
Chris@23
|
894 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
895 count++;
|
Chris@23
|
896 }
|
Chris@23
|
897 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
898
|
matthiasm@3
|
899
|
Chris@23
|
900 // mode filter on chordSequence
|
Chris@23
|
901 count = 0;
|
Chris@23
|
902 string oldChord = "";
|
Chris@23
|
903 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
904 Feature f6 = *it;
|
Chris@23
|
905 Feature f7; // chord estimate
|
Chris@23
|
906 f7.hasTimestamp = true;
|
Chris@23
|
907 f7.timestamp = f6.timestamp;
|
Chris@23
|
908 Feature f8; // chord estimate
|
Chris@23
|
909 f8.hasTimestamp = true;
|
Chris@23
|
910 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
911
|
Chris@23
|
912 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
913 int maxChordCount = 0;
|
Chris@23
|
914 int maxChordIndex = nChord-1;
|
Chris@23
|
915 string maxChord;
|
Chris@23
|
916 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
917 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
918 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
919 chordCount[chordSequence[i]]++;
|
Chris@23
|
920 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
921 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
922 maxChordCount++;
|
Chris@23
|
923 maxChordIndex = chordSequence[i];
|
Chris@23
|
924 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
925 }
|
Chris@23
|
926 }
|
Chris@23
|
927 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
928 // cerr << maxChordIndex << endl;
|
Chris@23
|
929 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
930 // cerr << chordchange[count] << endl;
|
Chris@23
|
931 fsOut[9].push_back(f8);
|
Chris@23
|
932 if (oldChord != maxChord) {
|
Chris@23
|
933 oldChord = maxChord;
|
matthiasm@3
|
934
|
Chris@23
|
935 // char buffer1 [50];
|
Chris@23
|
936 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
937 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
938 // } else {
|
Chris@23
|
939 // sprintf(buffer1, "N");
|
Chris@23
|
940 // }
|
Chris@23
|
941 // f7.label = buffer1;
|
Chris@23
|
942 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
943 fsOut[7].push_back(f7);
|
Chris@23
|
944 }
|
Chris@23
|
945 count++;
|
Chris@23
|
946 }
|
Chris@23
|
947 Feature f7; // last chord estimate
|
Chris@23
|
948 f7.hasTimestamp = true;
|
Chris@23
|
949 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
950 f7.label = "N";
|
Chris@23
|
951 fsOut[7].push_back(f7);
|
Chris@23
|
952 cerr << "done." << endl;
|
Chris@23
|
953 // // musicity
|
Chris@23
|
954 // count = 0;
|
Chris@23
|
955 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
956 // vector<float> musicityValue;
|
Chris@23
|
957 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
958 // Feature f4 = *it;
|
Chris@23
|
959 //
|
Chris@23
|
960 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
961 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
962 // float chromasum = 0;
|
Chris@23
|
963 // float diffsum = 0;
|
Chris@23
|
964 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
965 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
966 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
967 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
968 // }
|
Chris@23
|
969 // }
|
Chris@23
|
970 // diffsum /= chromasum;
|
Chris@23
|
971 // musicityValue.push_back(diffsum);
|
Chris@23
|
972 // count++;
|
Chris@23
|
973 // }
|
Chris@23
|
974 //
|
Chris@23
|
975 // float musicityThreshold = 0.44;
|
Chris@23
|
976 // if (m_stepSize == 4096) {
|
Chris@23
|
977 // musicityThreshold = 0.74;
|
Chris@23
|
978 // }
|
Chris@23
|
979 // if (m_stepSize == 4410) {
|
Chris@23
|
980 // musicityThreshold = 0.77;
|
Chris@23
|
981 // }
|
Chris@23
|
982 //
|
Chris@23
|
983 // count = 0;
|
Chris@23
|
984 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
985 // Feature f4 = *it;
|
Chris@23
|
986 // Feature f8; // musicity
|
Chris@23
|
987 // Feature f9; // musicity segmenter
|
Chris@23
|
988 //
|
Chris@23
|
989 // f8.hasTimestamp = true;
|
Chris@23
|
990 // f8.timestamp = f4.timestamp;
|
Chris@23
|
991 // f9.hasTimestamp = true;
|
Chris@23
|
992 // f9.timestamp = f4.timestamp;
|
Chris@23
|
993 //
|
Chris@23
|
994 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
995 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
996 // int musicityCount = 0;
|
Chris@23
|
997 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
998 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
999 // }
|
Chris@23
|
1000 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
1001 //
|
Chris@23
|
1002 // if (isSpeech) {
|
Chris@23
|
1003 // if (oldlabeltype != 2) {
|
Chris@23
|
1004 // f9.label = "Speech";
|
Chris@23
|
1005 // fsOut[9].push_back(f9);
|
Chris@23
|
1006 // oldlabeltype = 2;
|
Chris@23
|
1007 // }
|
Chris@23
|
1008 // } else {
|
Chris@23
|
1009 // if (oldlabeltype != 1) {
|
Chris@23
|
1010 // f9.label = "Music";
|
Chris@23
|
1011 // fsOut[9].push_back(f9);
|
Chris@23
|
1012 // oldlabeltype = 1;
|
Chris@23
|
1013 // }
|
Chris@23
|
1014 // }
|
Chris@23
|
1015 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1016 // fsOut[8].push_back(f8);
|
Chris@23
|
1017 // count++;
|
Chris@23
|
1018 // }
|
Chris@23
|
1019 return fsOut;
|
matthiasm@0
|
1020
|
matthiasm@0
|
1021 }
|
matthiasm@0
|
1022
|
Chris@35
|
1023 #endif
|