Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@27
|
31 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
32
|
Chris@35
|
33 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
34 Plugin(inputSampleRate),
|
Chris@35
|
35 m_logSpectrum(0),
|
Chris@23
|
36 m_blockSize(0),
|
Chris@23
|
37 m_stepSize(0),
|
Chris@23
|
38 m_lengthOfNoteIndex(0),
|
Chris@23
|
39 m_meanTuning0(0),
|
Chris@23
|
40 m_meanTuning1(0),
|
Chris@23
|
41 m_meanTuning2(0),
|
Chris@23
|
42 m_localTuning0(0),
|
Chris@23
|
43 m_localTuning1(0),
|
Chris@23
|
44 m_localTuning2(0),
|
mail@41
|
45 m_whitening(1.0),
|
Chris@23
|
46 m_preset(0.0),
|
Chris@23
|
47 m_localTuning(0),
|
Chris@23
|
48 m_kernelValue(0),
|
Chris@23
|
49 m_kernelFftIndex(0),
|
Chris@23
|
50 m_kernelNoteIndex(0),
|
Chris@23
|
51 m_dict(0),
|
Chris@23
|
52 m_tuneLocal(false),
|
Chris@23
|
53 m_chorddict(0),
|
Chris@23
|
54 m_chordnames(0),
|
Chris@23
|
55 m_doNormalizeChroma(0),
|
mail@41
|
56 m_rollon(0.0),
|
matthiasm@42
|
57 m_s(0.7),
|
matthiasm@42
|
58 m_useNNLS(1)
|
matthiasm@0
|
59 {
|
Chris@35
|
60 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
61
|
Chris@23
|
62 // make the *note* dictionary matrix
|
Chris@23
|
63 m_dict = new float[nNote * 84];
|
Chris@23
|
64 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
65 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@7
|
66
|
Chris@23
|
67 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
68 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
69 }
|
matthiasm@0
|
70
|
matthiasm@0
|
71
|
Chris@35
|
72 NNLSBase::~NNLSBase()
|
matthiasm@0
|
73 {
|
Chris@35
|
74 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
75 delete [] m_dict;
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77
|
matthiasm@0
|
78 string
|
Chris@35
|
79 NNLSBase::getMaker() const
|
matthiasm@0
|
80 {
|
Chris@23
|
81 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
82 // Your name here
|
matthiasm@0
|
83 return "Matthias Mauch";
|
matthiasm@0
|
84 }
|
matthiasm@0
|
85
|
matthiasm@0
|
86 int
|
Chris@35
|
87 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
88 {
|
Chris@23
|
89 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
90 // Increment this each time you release a version that behaves
|
matthiasm@0
|
91 // differently from the previous one
|
matthiasm@0
|
92 return 1;
|
matthiasm@0
|
93 }
|
matthiasm@0
|
94
|
matthiasm@0
|
95 string
|
Chris@35
|
96 NNLSBase::getCopyright() const
|
matthiasm@0
|
97 {
|
Chris@23
|
98 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
99 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
100 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
101 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
102 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
103 return "GPL";
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
Chris@35
|
106 NNLSBase::InputDomain
|
Chris@35
|
107 NNLSBase::getInputDomain() const
|
matthiasm@0
|
108 {
|
Chris@23
|
109 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
110 return FrequencyDomain;
|
matthiasm@0
|
111 }
|
matthiasm@0
|
112
|
matthiasm@0
|
113 size_t
|
Chris@35
|
114 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
115 {
|
Chris@23
|
116 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
117 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
118 }
|
matthiasm@0
|
119
|
matthiasm@0
|
120 size_t
|
Chris@35
|
121 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
122 {
|
Chris@23
|
123 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
124 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
125 // means the same as the block size for TimeDomain
|
Chris@23
|
126 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
127 }
|
matthiasm@0
|
128
|
matthiasm@0
|
129 size_t
|
Chris@35
|
130 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
131 {
|
Chris@23
|
132 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
133 return 1;
|
matthiasm@0
|
134 }
|
matthiasm@0
|
135
|
matthiasm@0
|
136 size_t
|
Chris@35
|
137 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
138 {
|
Chris@23
|
139 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
140 return 1;
|
matthiasm@0
|
141 }
|
matthiasm@0
|
142
|
Chris@35
|
143 NNLSBase::ParameterList
|
Chris@35
|
144 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
145 {
|
Chris@23
|
146 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
147 ParameterList list;
|
matthiasm@0
|
148
|
matthiasm@42
|
149 ParameterDescriptor d;
|
matthiasm@42
|
150 d.identifier = "useNNLS";
|
matthiasm@42
|
151 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
152 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
153 d.unit = "";
|
matthiasm@42
|
154 d.minValue = 0.0;
|
matthiasm@42
|
155 d.maxValue = 1.0;
|
matthiasm@42
|
156 d.defaultValue = 1.0;
|
matthiasm@42
|
157 d.isQuantized = true;
|
matthiasm@42
|
158 d.quantizeStep = 1.0;
|
matthiasm@42
|
159 list.push_back(d);
|
matthiasm@42
|
160
|
mail@41
|
161 ParameterDescriptor d0;
|
mail@41
|
162 d0.identifier = "rollon";
|
mail@41
|
163 d0.name = "spectral roll-on";
|
mail@41
|
164 d0.description = "The bins below the spectral roll-on quantile will be set to 0.";
|
mail@41
|
165 d0.unit = "";
|
mail@41
|
166 d0.minValue = 0;
|
mail@41
|
167 d0.maxValue = 0.05;
|
mail@41
|
168 d0.defaultValue = 0;
|
matthiasm@48
|
169 d0.isQuantized = true;
|
matthiasm@48
|
170 d0.quantizeStep = 0.005;
|
mail@41
|
171 list.push_back(d0);
|
matthiasm@4
|
172
|
matthiasm@4
|
173 ParameterDescriptor d1;
|
matthiasm@4
|
174 d1.identifier = "tuningmode";
|
matthiasm@4
|
175 d1.name = "tuning mode";
|
matthiasm@4
|
176 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
177 d1.unit = "";
|
matthiasm@4
|
178 d1.minValue = 0;
|
matthiasm@4
|
179 d1.maxValue = 1;
|
matthiasm@4
|
180 d1.defaultValue = 0;
|
matthiasm@4
|
181 d1.isQuantized = true;
|
matthiasm@4
|
182 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
183 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
184 d1.quantizeStep = 1.0;
|
matthiasm@4
|
185 list.push_back(d1);
|
matthiasm@4
|
186
|
mail@41
|
187 ParameterDescriptor d2;
|
mail@41
|
188 d2.identifier = "whitening";
|
mail@41
|
189 d2.name = "spectral whitening";
|
mail@41
|
190 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
191 d2.unit = "";
|
mail@41
|
192 d2.isQuantized = true;
|
mail@41
|
193 d2.minValue = 0.0;
|
mail@41
|
194 d2.maxValue = 1.0;
|
mail@41
|
195 d2.defaultValue = 1.0;
|
mail@41
|
196 d2.isQuantized = false;
|
mail@41
|
197 list.push_back(d2);
|
mail@41
|
198
|
mail@41
|
199 ParameterDescriptor d3;
|
mail@41
|
200 d3.identifier = "s";
|
mail@41
|
201 d3.name = "spectral shape";
|
mail@41
|
202 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
203 d3.unit = "";
|
mail@41
|
204 d3.minValue = 0.5;
|
mail@41
|
205 d3.maxValue = 0.9;
|
mail@41
|
206 d3.defaultValue = 0.7;
|
mail@41
|
207 d3.isQuantized = false;
|
mail@41
|
208 list.push_back(d3);
|
mail@41
|
209
|
Chris@23
|
210 ParameterDescriptor d4;
|
matthiasm@12
|
211 d4.identifier = "chromanormalize";
|
matthiasm@12
|
212 d4.name = "chroma normalization";
|
matthiasm@12
|
213 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
214 d4.unit = "";
|
matthiasm@12
|
215 d4.minValue = 0;
|
matthiasm@13
|
216 d4.maxValue = 3;
|
matthiasm@12
|
217 d4.defaultValue = 0;
|
matthiasm@12
|
218 d4.isQuantized = true;
|
matthiasm@13
|
219 d4.valueNames.push_back("none");
|
matthiasm@13
|
220 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
221 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
222 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
223 d4.quantizeStep = 1.0;
|
matthiasm@12
|
224 list.push_back(d4);
|
matthiasm@4
|
225
|
matthiasm@0
|
226 return list;
|
matthiasm@0
|
227 }
|
matthiasm@0
|
228
|
matthiasm@0
|
229 float
|
Chris@35
|
230 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
231 {
|
Chris@23
|
232 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
233 if (identifier == "useNNLS") {
|
matthiasm@42
|
234 return m_useNNLS;
|
matthiasm@0
|
235 }
|
matthiasm@0
|
236
|
mail@41
|
237 if (identifier == "whitening") {
|
mail@41
|
238 return m_whitening;
|
mail@41
|
239 }
|
mail@41
|
240
|
mail@41
|
241 if (identifier == "s") {
|
mail@41
|
242 return m_s;
|
matthiasm@0
|
243 }
|
matthiasm@17
|
244
|
Chris@23
|
245 if (identifier == "rollon") {
|
matthiasm@17
|
246 return m_rollon;
|
matthiasm@17
|
247 }
|
matthiasm@0
|
248
|
matthiasm@0
|
249 if (identifier == "tuningmode") {
|
matthiasm@0
|
250 if (m_tuneLocal) {
|
matthiasm@0
|
251 return 1.0;
|
matthiasm@0
|
252 } else {
|
matthiasm@0
|
253 return 0.0;
|
matthiasm@0
|
254 }
|
matthiasm@0
|
255 }
|
Chris@23
|
256 if (identifier == "preset") {
|
Chris@23
|
257 return m_preset;
|
matthiasm@3
|
258 }
|
Chris@23
|
259 if (identifier == "chromanormalize") {
|
Chris@23
|
260 return m_doNormalizeChroma;
|
matthiasm@12
|
261 }
|
matthiasm@0
|
262 return 0;
|
matthiasm@0
|
263
|
matthiasm@0
|
264 }
|
matthiasm@0
|
265
|
matthiasm@0
|
266 void
|
Chris@35
|
267 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
268 {
|
Chris@23
|
269 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
270 if (identifier == "useNNLS") {
|
matthiasm@42
|
271 m_useNNLS = (int) value;
|
matthiasm@0
|
272 }
|
matthiasm@0
|
273
|
mail@41
|
274 if (identifier == "whitening") {
|
mail@41
|
275 m_whitening = value;
|
matthiasm@0
|
276 }
|
matthiasm@0
|
277
|
mail@41
|
278 if (identifier == "s") {
|
mail@41
|
279 m_s = value;
|
mail@41
|
280 }
|
mail@41
|
281
|
matthiasm@0
|
282 if (identifier == "tuningmode") {
|
matthiasm@0
|
283 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
284 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
285 }
|
matthiasm@42
|
286 // if (identifier == "preset") {
|
matthiasm@42
|
287 // m_preset = value;
|
matthiasm@42
|
288 // if (m_preset == 0.0) {
|
matthiasm@42
|
289 // m_tuneLocal = false;
|
matthiasm@42
|
290 // m_whitening = 1.0;
|
matthiasm@42
|
291 // m_dictID = 0.0;
|
matthiasm@42
|
292 // }
|
matthiasm@42
|
293 // if (m_preset == 1.0) {
|
matthiasm@42
|
294 // m_tuneLocal = false;
|
matthiasm@42
|
295 // m_whitening = 1.0;
|
matthiasm@42
|
296 // m_dictID = 1.0;
|
matthiasm@42
|
297 // }
|
matthiasm@42
|
298 // if (m_preset == 2.0) {
|
matthiasm@42
|
299 // m_tuneLocal = false;
|
matthiasm@42
|
300 // m_whitening = 0.7;
|
matthiasm@42
|
301 // m_dictID = 0.0;
|
matthiasm@42
|
302 // }
|
matthiasm@42
|
303 // }
|
Chris@23
|
304 if (identifier == "chromanormalize") {
|
Chris@23
|
305 m_doNormalizeChroma = value;
|
Chris@23
|
306 }
|
matthiasm@17
|
307
|
Chris@23
|
308 if (identifier == "rollon") {
|
Chris@23
|
309 m_rollon = value;
|
Chris@23
|
310 }
|
matthiasm@0
|
311 }
|
matthiasm@0
|
312
|
Chris@35
|
313 NNLSBase::ProgramList
|
Chris@35
|
314 NNLSBase::getPrograms() const
|
matthiasm@0
|
315 {
|
Chris@23
|
316 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
317 ProgramList list;
|
matthiasm@0
|
318
|
matthiasm@0
|
319 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
320 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
321
|
matthiasm@0
|
322 return list;
|
matthiasm@0
|
323 }
|
matthiasm@0
|
324
|
matthiasm@0
|
325 string
|
Chris@35
|
326 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
327 {
|
Chris@23
|
328 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
329 return ""; // no programs
|
matthiasm@0
|
330 }
|
matthiasm@0
|
331
|
matthiasm@0
|
332 void
|
Chris@35
|
333 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
334 {
|
Chris@23
|
335 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
336 }
|
matthiasm@0
|
337
|
matthiasm@0
|
338
|
matthiasm@0
|
339 bool
|
Chris@35
|
340 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
341 {
|
Chris@23
|
342 if (debug_on) {
|
Chris@23
|
343 cerr << "--> initialise";
|
Chris@23
|
344 }
|
matthiasm@1
|
345
|
matthiasm@0
|
346 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
347 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
348 m_blockSize = blockSize;
|
matthiasm@0
|
349 m_stepSize = stepSize;
|
Chris@35
|
350 m_frameCount = 0;
|
Chris@23
|
351 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
352 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
353 float *tempkernel;
|
matthiasm@1
|
354
|
Chris@23
|
355 tempkernel = new float[tempn];
|
matthiasm@1
|
356
|
Chris@23
|
357 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
358 m_kernelValue.clear();
|
Chris@23
|
359 m_kernelFftIndex.clear();
|
Chris@23
|
360 m_kernelNoteIndex.clear();
|
Chris@23
|
361 int countNonzero = 0;
|
Chris@23
|
362 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
363 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
364 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
365 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
366 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
367 countNonzero++;
|
Chris@23
|
368 }
|
Chris@23
|
369 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
370 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
371 }
|
Chris@23
|
372 }
|
Chris@23
|
373 }
|
Chris@23
|
374 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
375 delete [] tempkernel;
|
Chris@35
|
376 /*
|
Chris@23
|
377 ofstream myfile;
|
Chris@23
|
378 myfile.open ("matrix.txt");
|
matthiasm@3
|
379 // myfile << "Writing this to a file.\n";
|
Chris@23
|
380 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
381 myfile << m_dict[i] << endl;
|
Chris@23
|
382 }
|
matthiasm@3
|
383 myfile.close();
|
Chris@35
|
384 */
|
matthiasm@0
|
385 return true;
|
matthiasm@0
|
386 }
|
matthiasm@0
|
387
|
matthiasm@0
|
388 void
|
Chris@35
|
389 NNLSBase::reset()
|
matthiasm@0
|
390 {
|
Chris@23
|
391 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
392
|
matthiasm@0
|
393 // Clear buffers, reset stored values, etc
|
Chris@35
|
394 m_frameCount = 0;
|
matthiasm@42
|
395 // m_dictID = 0;
|
Chris@35
|
396 m_logSpectrum.clear();
|
Chris@23
|
397 m_meanTuning0 = 0;
|
Chris@23
|
398 m_meanTuning1 = 0;
|
Chris@23
|
399 m_meanTuning2 = 0;
|
Chris@23
|
400 m_localTuning0 = 0;
|
Chris@23
|
401 m_localTuning1 = 0;
|
Chris@23
|
402 m_localTuning2 = 0;
|
Chris@23
|
403 m_localTuning.clear();
|
matthiasm@0
|
404 }
|
matthiasm@0
|
405
|
Chris@35
|
406 void
|
Chris@35
|
407 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
408 {
|
Chris@35
|
409 m_frameCount++;
|
Chris@23
|
410 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
411
|
Chris@23
|
412 const float *fbuf = inputBuffers[0];
|
Chris@23
|
413 float energysum = 0;
|
Chris@23
|
414 // make magnitude
|
Chris@23
|
415 float maxmag = -10000;
|
Chris@23
|
416 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
417 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
418 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
419 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
420 if (m_rollon > 0) {
|
Chris@23
|
421 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
422 }
|
Chris@23
|
423 }
|
matthiasm@14
|
424
|
Chris@23
|
425 float cumenergy = 0;
|
Chris@23
|
426 if (m_rollon > 0) {
|
Chris@23
|
427 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
428 cumenergy += pow(magnitude[iBin],2);
|
Chris@23
|
429 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
|
Chris@23
|
430 else break;
|
Chris@23
|
431 }
|
Chris@23
|
432 }
|
matthiasm@17
|
433
|
Chris@23
|
434 if (maxmag < 2) {
|
Chris@23
|
435 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
436 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
437 magnitude[iBin] = 0;
|
Chris@23
|
438 }
|
Chris@23
|
439 }
|
matthiasm@4
|
440
|
Chris@23
|
441 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
442 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
443 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
444 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
445 }
|
Chris@23
|
446 int binCount = 0;
|
Chris@23
|
447 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
448 // cerr << ".";
|
Chris@23
|
449 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
450 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
451 binCount++;
|
Chris@23
|
452 }
|
Chris@23
|
453 // cerr << nm[20];
|
Chris@23
|
454 // cerr << endl;
|
matthiasm@0
|
455
|
matthiasm@0
|
456
|
Chris@35
|
457 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
458 // update means of complex tuning variables
|
Chris@35
|
459 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
460 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
461 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
|
matthiasm@0
|
462
|
matthiasm@0
|
463 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
464 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
465 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
466 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
467 float ratioOld = 0.997;
|
matthiasm@3
|
468 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
469 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
470 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
471 }
|
matthiasm@0
|
472
|
matthiasm@0
|
473 // if (m_tuneLocal) {
|
Chris@23
|
474 // local tuning
|
Chris@23
|
475 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
476 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
477 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
478 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
479
|
Chris@23
|
480 Feature f1; // logfreqspec
|
Chris@23
|
481 f1.hasTimestamp = true;
|
matthiasm@0
|
482 f1.timestamp = timestamp;
|
Chris@23
|
483 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
484 f1.values.push_back(nm[iNote]);
|
Chris@23
|
485 }
|
matthiasm@0
|
486
|
matthiasm@0
|
487 // deletes
|
matthiasm@0
|
488 delete[] magnitude;
|
matthiasm@0
|
489 delete[] nm;
|
matthiasm@0
|
490
|
Chris@35
|
491 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
492 }
|
matthiasm@0
|
493
|
Chris@35
|
494
|
Chris@35
|
495 #ifdef NOT_DEFINED
|
Chris@35
|
496
|
Chris@35
|
497 NNLSBase::FeatureSet
|
Chris@35
|
498 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
499 {
|
Chris@23
|
500 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
501 FeatureSet fsOut;
|
Chris@35
|
502 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
503 int nChord = m_chordnames.size();
|
Chris@23
|
504 //
|
Chris@23
|
505 /** Calculate Tuning
|
Chris@23
|
506 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
507 cumulative mean real and imag values)
|
Chris@23
|
508 **/
|
Chris@23
|
509 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
510 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
511 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
512 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
513 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
514 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
515
|
Chris@23
|
516 char buffer0 [50];
|
matthiasm@1
|
517
|
Chris@23
|
518 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
519
|
Chris@23
|
520 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
521
|
Chris@23
|
522 // push tuning to FeatureSet fsOut
|
Chris@23
|
523 Feature f0; // tuning
|
Chris@23
|
524 f0.hasTimestamp = true;
|
Chris@23
|
525 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
526 f0.label = buffer0;
|
Chris@23
|
527 fsOut[0].push_back(f0);
|
matthiasm@1
|
528
|
Chris@23
|
529 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
530 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
531 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
532 **/
|
Chris@23
|
533 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
534
|
Chris@23
|
535 float tempValue = 0;
|
Chris@23
|
536 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
537 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
538 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
539 int count = 0;
|
matthiasm@1
|
540
|
Chris@35
|
541 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
542 Feature f1 = *i;
|
Chris@23
|
543 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
544 f2.hasTimestamp = true;
|
Chris@23
|
545 f2.timestamp = f1.timestamp;
|
Chris@23
|
546 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
547
|
Chris@23
|
548 if (m_tuneLocal) {
|
Chris@23
|
549 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
550 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
551 }
|
matthiasm@1
|
552
|
Chris@23
|
553 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
554
|
Chris@23
|
555 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
556 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
557 f2.values.push_back(tempValue);
|
Chris@23
|
558 }
|
matthiasm@1
|
559
|
Chris@23
|
560 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
561 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
562 vector<float> runningstd;
|
Chris@23
|
563 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
564 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
565 }
|
Chris@23
|
566 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
567 for (int i = 0; i < 256; i++) {
|
Chris@23
|
568 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
569 if (runningstd[i] > 0) {
|
Chris@23
|
570 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@41
|
571 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
572 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@41
|
573 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
574 }
|
Chris@23
|
575 if (f2.values[i] < 0) {
|
Chris@23
|
576 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
577 }
|
Chris@23
|
578 }
|
Chris@23
|
579 fsOut[2].push_back(f2);
|
Chris@23
|
580 count++;
|
Chris@23
|
581 }
|
Chris@23
|
582 cerr << "done." << endl;
|
matthiasm@1
|
583
|
Chris@23
|
584 /** Semitone spectrum and chromagrams
|
Chris@23
|
585 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
586 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
587 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
588 bass and treble stacked onto each other).
|
Chris@23
|
589 **/
|
matthiasm@42
|
590 if (m_useNNLS == 0) {
|
Chris@23
|
591 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
592 } else {
|
Chris@23
|
593 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
594 }
|
matthiasm@13
|
595
|
matthiasm@1
|
596
|
Chris@23
|
597 vector<vector<float> > chordogram;
|
Chris@23
|
598 vector<vector<int> > scoreChordogram;
|
Chris@23
|
599 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
600 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
601 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
602 count = 0;
|
matthiasm@9
|
603
|
Chris@23
|
604 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
605 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
606 Feature f3; // semitone spectrum
|
Chris@23
|
607 Feature f4; // treble chromagram
|
Chris@23
|
608 Feature f5; // bass chromagram
|
Chris@23
|
609 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
610
|
Chris@23
|
611 f3.hasTimestamp = true;
|
Chris@23
|
612 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
613
|
Chris@23
|
614 f4.hasTimestamp = true;
|
Chris@23
|
615 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
616
|
Chris@23
|
617 f5.hasTimestamp = true;
|
Chris@23
|
618 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
619
|
Chris@23
|
620 f6.hasTimestamp = true;
|
Chris@23
|
621 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
622
|
Chris@29
|
623 float b[256];
|
matthiasm@1
|
624
|
Chris@23
|
625 bool some_b_greater_zero = false;
|
Chris@23
|
626 float sumb = 0;
|
Chris@23
|
627 for (int i = 0; i < 256; i++) {
|
Chris@23
|
628 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
629 b[i] = f2.values[i];
|
Chris@23
|
630 sumb += b[i];
|
Chris@23
|
631 if (b[i] > 0) {
|
Chris@23
|
632 some_b_greater_zero = true;
|
Chris@23
|
633 }
|
Chris@23
|
634 }
|
matthiasm@1
|
635
|
Chris@23
|
636 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
637
|
Chris@23
|
638 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
639 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
640 float currval;
|
Chris@23
|
641 unsigned iSemitone = 0;
|
matthiasm@1
|
642
|
Chris@23
|
643 if (some_b_greater_zero) {
|
matthiasm@42
|
644 if (m_useNNLS == 0) {
|
Chris@23
|
645 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
646 currval = 0;
|
Chris@23
|
647 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
648 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
649 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
650 f3.values.push_back(currval);
|
Chris@23
|
651 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
652 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
653 iSemitone++;
|
Chris@23
|
654 }
|
matthiasm@1
|
655
|
Chris@23
|
656 } else {
|
Chris@29
|
657 float x[84+1000];
|
Chris@23
|
658 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
659 vector<int> signifIndex;
|
Chris@23
|
660 int index=0;
|
Chris@23
|
661 sumb /= 84.0;
|
Chris@23
|
662 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
663 float currval = 0;
|
Chris@23
|
664 currval += b[iNote + 1 + -1];
|
Chris@23
|
665 currval += b[iNote + 1 + 0];
|
Chris@23
|
666 currval += b[iNote + 1 + 1];
|
Chris@23
|
667 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
668 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
669 index++;
|
Chris@23
|
670 }
|
Chris@29
|
671 float rnorm;
|
Chris@29
|
672 float w[84+1000];
|
Chris@29
|
673 float zz[84+1000];
|
Chris@23
|
674 int indx[84+1000];
|
Chris@23
|
675 int mode;
|
Chris@23
|
676 int dictsize = 256*signifIndex.size();
|
Chris@23
|
677 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
678 float *curr_dict = new float[dictsize];
|
Chris@23
|
679 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
680 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
681 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
682 }
|
Chris@23
|
683 }
|
Chris@29
|
684 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
685 delete [] curr_dict;
|
Chris@23
|
686 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
687 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
688 // cerr << mode << endl;
|
Chris@23
|
689 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
690 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
691 }
|
Chris@23
|
692 }
|
Chris@23
|
693 }
|
matthiasm@13
|
694
|
matthiasm@10
|
695
|
matthiasm@12
|
696
|
matthiasm@13
|
697
|
Chris@23
|
698 f4.values = chroma;
|
Chris@23
|
699 f5.values = basschroma;
|
Chris@23
|
700 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
701 f6.values = chroma;
|
matthiasm@1
|
702
|
Chris@23
|
703 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
704 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
705 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
706 case 0: // should never end up here
|
Chris@23
|
707 break;
|
Chris@23
|
708 case 1:
|
Chris@23
|
709 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
710 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
711 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
712 break;
|
Chris@23
|
713 case 2:
|
Chris@23
|
714 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
715 chromanorm[0] += *it;
|
Chris@23
|
716 }
|
Chris@23
|
717 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
718 chromanorm[1] += *it;
|
Chris@23
|
719 }
|
Chris@23
|
720 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
721 chromanorm[2] += *it;
|
Chris@23
|
722 }
|
Chris@23
|
723 break;
|
Chris@23
|
724 case 3:
|
Chris@23
|
725 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
726 chromanorm[0] += pow(*it,2);
|
Chris@23
|
727 }
|
Chris@23
|
728 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
729 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
730 chromanorm[1] += pow(*it,2);
|
Chris@23
|
731 }
|
Chris@23
|
732 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
733 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
734 chromanorm[2] += pow(*it,2);
|
Chris@23
|
735 }
|
Chris@23
|
736 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
737 break;
|
Chris@23
|
738 }
|
Chris@23
|
739 if (chromanorm[0] > 0) {
|
Chris@23
|
740 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
741 f4.values[i] /= chromanorm[0];
|
Chris@23
|
742 }
|
Chris@23
|
743 }
|
Chris@23
|
744 if (chromanorm[1] > 0) {
|
Chris@23
|
745 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
746 f5.values[i] /= chromanorm[1];
|
Chris@23
|
747 }
|
Chris@23
|
748 }
|
Chris@23
|
749 if (chromanorm[2] > 0) {
|
Chris@23
|
750 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
751 f6.values[i] /= chromanorm[2];
|
Chris@23
|
752 }
|
Chris@23
|
753 }
|
matthiasm@13
|
754
|
Chris@23
|
755 }
|
matthiasm@13
|
756
|
Chris@23
|
757 // local chord estimation
|
Chris@23
|
758 vector<float> currentChordSalience;
|
Chris@23
|
759 float tempchordvalue = 0;
|
Chris@23
|
760 float sumchordvalue = 0;
|
matthiasm@9
|
761
|
Chris@23
|
762 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
763 tempchordvalue = 0;
|
Chris@23
|
764 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
765 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
766 }
|
Chris@23
|
767 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
768 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
769 }
|
Chris@23
|
770 sumchordvalue+=tempchordvalue;
|
Chris@23
|
771 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
772 }
|
Chris@23
|
773 if (sumchordvalue > 0) {
|
Chris@23
|
774 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
775 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
776 }
|
Chris@23
|
777 } else {
|
Chris@23
|
778 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
779 }
|
Chris@23
|
780 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
781
|
Chris@23
|
782 fsOut[3].push_back(f3);
|
Chris@23
|
783 fsOut[4].push_back(f4);
|
Chris@23
|
784 fsOut[5].push_back(f5);
|
Chris@23
|
785 fsOut[6].push_back(f6);
|
Chris@23
|
786 count++;
|
Chris@23
|
787 }
|
Chris@23
|
788 cerr << "done." << endl;
|
matthiasm@13
|
789
|
matthiasm@10
|
790
|
Chris@23
|
791 /* Simple chord estimation
|
Chris@23
|
792 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
793 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
794 */
|
Chris@23
|
795 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
796 count = 0;
|
Chris@23
|
797 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
798 vector<int> chordSequence;
|
Chris@23
|
799 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
800 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
801 scoreChordogram.push_back(temp);
|
Chris@23
|
802 }
|
Chris@23
|
803 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
804 int startIndex = count + 1;
|
Chris@23
|
805 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
806
|
Chris@23
|
807 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
808
|
Chris@23
|
809 vector<int> chordCandidates;
|
Chris@23
|
810 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
811 // float currsum = 0;
|
Chris@23
|
812 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
813 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
814 // }
|
Chris@23
|
815 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
816 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
817 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
818 chordCandidates.push_back(iChord);
|
Chris@23
|
819 break;
|
Chris@23
|
820 }
|
Chris@23
|
821 }
|
Chris@23
|
822 }
|
Chris@23
|
823 chordCandidates.push_back(nChord-1);
|
Chris@23
|
824 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
825
|
Chris@23
|
826 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
827 float maxindex = 0; //... and the index thereof
|
Chris@23
|
828 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
829 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
830
|
Chris@23
|
831 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
832 // now find the max values on both sides of iWF
|
Chris@23
|
833 // left side:
|
Chris@23
|
834 float maxL = 0;
|
Chris@23
|
835 unsigned maxindL = nChord-1;
|
Chris@23
|
836 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
837 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
838 float currsum = 0;
|
Chris@23
|
839 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
840 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
841 }
|
Chris@23
|
842 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
843 if (currsum > maxL) {
|
Chris@23
|
844 maxL = currsum;
|
Chris@23
|
845 maxindL = iChord;
|
Chris@23
|
846 }
|
Chris@23
|
847 }
|
Chris@23
|
848 // right side:
|
Chris@23
|
849 float maxR = 0;
|
Chris@23
|
850 unsigned maxindR = nChord-1;
|
Chris@23
|
851 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
852 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
853 float currsum = 0;
|
Chris@23
|
854 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
855 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
856 }
|
Chris@23
|
857 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
858 if (currsum > maxR) {
|
Chris@23
|
859 maxR = currsum;
|
Chris@23
|
860 maxindR = iChord;
|
Chris@23
|
861 }
|
Chris@23
|
862 }
|
Chris@23
|
863 if (maxL+maxR > maxval) {
|
Chris@23
|
864 maxval = maxL+maxR;
|
Chris@23
|
865 maxindex = iWF;
|
Chris@23
|
866 bestchordL = maxindL;
|
Chris@23
|
867 bestchordR = maxindR;
|
Chris@23
|
868 }
|
matthiasm@3
|
869
|
Chris@23
|
870 }
|
Chris@23
|
871 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
872 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
873 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
874 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
875 }
|
Chris@23
|
876 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
877 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
878 }
|
Chris@23
|
879 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
880 count++;
|
Chris@23
|
881 }
|
Chris@23
|
882 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
883 count = 0;
|
Chris@23
|
884 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
885 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
886 float maxindex = 0; //... and the index thereof
|
Chris@23
|
887 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
888 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
889 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
890 maxindex = iChord;
|
Chris@23
|
891 // cerr << iChord << endl;
|
Chris@23
|
892 }
|
Chris@23
|
893 }
|
Chris@23
|
894 chordSequence.push_back(maxindex);
|
Chris@23
|
895 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
896 count++;
|
Chris@23
|
897 }
|
Chris@23
|
898 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
899
|
matthiasm@3
|
900
|
Chris@23
|
901 // mode filter on chordSequence
|
Chris@23
|
902 count = 0;
|
Chris@23
|
903 string oldChord = "";
|
Chris@23
|
904 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
905 Feature f6 = *it;
|
Chris@23
|
906 Feature f7; // chord estimate
|
Chris@23
|
907 f7.hasTimestamp = true;
|
Chris@23
|
908 f7.timestamp = f6.timestamp;
|
Chris@23
|
909 Feature f8; // chord estimate
|
Chris@23
|
910 f8.hasTimestamp = true;
|
Chris@23
|
911 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
912
|
Chris@23
|
913 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
914 int maxChordCount = 0;
|
Chris@23
|
915 int maxChordIndex = nChord-1;
|
Chris@23
|
916 string maxChord;
|
Chris@23
|
917 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
918 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
919 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
920 chordCount[chordSequence[i]]++;
|
Chris@23
|
921 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
922 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
923 maxChordCount++;
|
Chris@23
|
924 maxChordIndex = chordSequence[i];
|
Chris@23
|
925 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
926 }
|
Chris@23
|
927 }
|
Chris@23
|
928 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
929 // cerr << maxChordIndex << endl;
|
Chris@23
|
930 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
931 // cerr << chordchange[count] << endl;
|
Chris@23
|
932 fsOut[9].push_back(f8);
|
Chris@23
|
933 if (oldChord != maxChord) {
|
Chris@23
|
934 oldChord = maxChord;
|
matthiasm@3
|
935
|
Chris@23
|
936 // char buffer1 [50];
|
Chris@23
|
937 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
938 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
939 // } else {
|
Chris@23
|
940 // sprintf(buffer1, "N");
|
Chris@23
|
941 // }
|
Chris@23
|
942 // f7.label = buffer1;
|
Chris@23
|
943 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
944 fsOut[7].push_back(f7);
|
Chris@23
|
945 }
|
Chris@23
|
946 count++;
|
Chris@23
|
947 }
|
Chris@23
|
948 Feature f7; // last chord estimate
|
Chris@23
|
949 f7.hasTimestamp = true;
|
Chris@23
|
950 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
951 f7.label = "N";
|
Chris@23
|
952 fsOut[7].push_back(f7);
|
Chris@23
|
953 cerr << "done." << endl;
|
Chris@23
|
954 // // musicity
|
Chris@23
|
955 // count = 0;
|
Chris@23
|
956 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
957 // vector<float> musicityValue;
|
Chris@23
|
958 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
959 // Feature f4 = *it;
|
Chris@23
|
960 //
|
Chris@23
|
961 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
962 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
963 // float chromasum = 0;
|
Chris@23
|
964 // float diffsum = 0;
|
Chris@23
|
965 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
966 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
967 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
968 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
969 // }
|
Chris@23
|
970 // }
|
Chris@23
|
971 // diffsum /= chromasum;
|
Chris@23
|
972 // musicityValue.push_back(diffsum);
|
Chris@23
|
973 // count++;
|
Chris@23
|
974 // }
|
Chris@23
|
975 //
|
Chris@23
|
976 // float musicityThreshold = 0.44;
|
Chris@23
|
977 // if (m_stepSize == 4096) {
|
Chris@23
|
978 // musicityThreshold = 0.74;
|
Chris@23
|
979 // }
|
Chris@23
|
980 // if (m_stepSize == 4410) {
|
Chris@23
|
981 // musicityThreshold = 0.77;
|
Chris@23
|
982 // }
|
Chris@23
|
983 //
|
Chris@23
|
984 // count = 0;
|
Chris@23
|
985 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
986 // Feature f4 = *it;
|
Chris@23
|
987 // Feature f8; // musicity
|
Chris@23
|
988 // Feature f9; // musicity segmenter
|
Chris@23
|
989 //
|
Chris@23
|
990 // f8.hasTimestamp = true;
|
Chris@23
|
991 // f8.timestamp = f4.timestamp;
|
Chris@23
|
992 // f9.hasTimestamp = true;
|
Chris@23
|
993 // f9.timestamp = f4.timestamp;
|
Chris@23
|
994 //
|
Chris@23
|
995 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
996 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
997 // int musicityCount = 0;
|
Chris@23
|
998 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
999 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
1000 // }
|
Chris@23
|
1001 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
1002 //
|
Chris@23
|
1003 // if (isSpeech) {
|
Chris@23
|
1004 // if (oldlabeltype != 2) {
|
Chris@23
|
1005 // f9.label = "Speech";
|
Chris@23
|
1006 // fsOut[9].push_back(f9);
|
Chris@23
|
1007 // oldlabeltype = 2;
|
Chris@23
|
1008 // }
|
Chris@23
|
1009 // } else {
|
Chris@23
|
1010 // if (oldlabeltype != 1) {
|
Chris@23
|
1011 // f9.label = "Music";
|
Chris@23
|
1012 // fsOut[9].push_back(f9);
|
Chris@23
|
1013 // oldlabeltype = 1;
|
Chris@23
|
1014 // }
|
Chris@23
|
1015 // }
|
Chris@23
|
1016 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1017 // fsOut[8].push_back(f8);
|
Chris@23
|
1018 // count++;
|
Chris@23
|
1019 // }
|
Chris@23
|
1020 return fsOut;
|
matthiasm@0
|
1021
|
matthiasm@0
|
1022 }
|
matthiasm@0
|
1023
|
Chris@35
|
1024 #endif
|