Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@27
|
31 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
32
|
Chris@35
|
33 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
34 Plugin(inputSampleRate),
|
Chris@35
|
35 m_logSpectrum(0),
|
Chris@23
|
36 m_blockSize(0),
|
Chris@23
|
37 m_stepSize(0),
|
Chris@23
|
38 m_lengthOfNoteIndex(0),
|
Chris@23
|
39 m_meanTuning0(0),
|
Chris@23
|
40 m_meanTuning1(0),
|
Chris@23
|
41 m_meanTuning2(0),
|
Chris@23
|
42 m_localTuning0(0),
|
Chris@23
|
43 m_localTuning1(0),
|
Chris@23
|
44 m_localTuning2(0),
|
mail@41
|
45 m_whitening(1.0),
|
Chris@23
|
46 m_preset(0.0),
|
Chris@23
|
47 m_localTuning(0),
|
Chris@23
|
48 m_kernelValue(0),
|
Chris@23
|
49 m_kernelFftIndex(0),
|
Chris@23
|
50 m_kernelNoteIndex(0),
|
Chris@23
|
51 m_dict(0),
|
mail@60
|
52 m_tuneLocal(0),
|
Chris@23
|
53 m_chorddict(0),
|
Chris@23
|
54 m_chordnames(0),
|
Chris@23
|
55 m_doNormalizeChroma(0),
|
mail@60
|
56 m_rollon(0),
|
matthiasm@42
|
57 m_s(0.7),
|
matthiasm@50
|
58 m_useNNLS(1),
|
matthiasm@50
|
59 m_useHMM(1)
|
matthiasm@0
|
60 {
|
Chris@35
|
61 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
62
|
Chris@23
|
63 // make the *note* dictionary matrix
|
Chris@23
|
64 m_dict = new float[nNote * 84];
|
Chris@23
|
65 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
66 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@7
|
67
|
Chris@23
|
68 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
69 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
70 }
|
matthiasm@0
|
71
|
matthiasm@0
|
72
|
Chris@35
|
73 NNLSBase::~NNLSBase()
|
matthiasm@0
|
74 {
|
Chris@35
|
75 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
76 delete [] m_dict;
|
matthiasm@0
|
77 }
|
matthiasm@0
|
78
|
matthiasm@0
|
79 string
|
Chris@35
|
80 NNLSBase::getMaker() const
|
matthiasm@0
|
81 {
|
Chris@23
|
82 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
83 // Your name here
|
matthiasm@0
|
84 return "Matthias Mauch";
|
matthiasm@0
|
85 }
|
matthiasm@0
|
86
|
matthiasm@0
|
87 int
|
Chris@35
|
88 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
89 {
|
Chris@23
|
90 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
91 // Increment this each time you release a version that behaves
|
matthiasm@0
|
92 // differently from the previous one
|
matthiasm@0
|
93 return 1;
|
matthiasm@0
|
94 }
|
matthiasm@0
|
95
|
matthiasm@0
|
96 string
|
Chris@35
|
97 NNLSBase::getCopyright() const
|
matthiasm@0
|
98 {
|
Chris@23
|
99 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
100 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
101 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
102 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
103 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
104 return "GPL";
|
matthiasm@0
|
105 }
|
matthiasm@0
|
106
|
Chris@35
|
107 NNLSBase::InputDomain
|
Chris@35
|
108 NNLSBase::getInputDomain() const
|
matthiasm@0
|
109 {
|
Chris@23
|
110 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
111 return FrequencyDomain;
|
matthiasm@0
|
112 }
|
matthiasm@0
|
113
|
matthiasm@0
|
114 size_t
|
Chris@35
|
115 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
116 {
|
Chris@23
|
117 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
118 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
119 }
|
matthiasm@0
|
120
|
matthiasm@0
|
121 size_t
|
Chris@35
|
122 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
123 {
|
Chris@23
|
124 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
125 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
126 // means the same as the block size for TimeDomain
|
Chris@23
|
127 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
128 }
|
matthiasm@0
|
129
|
matthiasm@0
|
130 size_t
|
Chris@35
|
131 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
132 {
|
Chris@23
|
133 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
134 return 1;
|
matthiasm@0
|
135 }
|
matthiasm@0
|
136
|
matthiasm@0
|
137 size_t
|
Chris@35
|
138 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
139 {
|
Chris@23
|
140 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
141 return 1;
|
matthiasm@0
|
142 }
|
matthiasm@0
|
143
|
Chris@35
|
144 NNLSBase::ParameterList
|
Chris@35
|
145 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
146 {
|
Chris@23
|
147 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
148 ParameterList list;
|
matthiasm@0
|
149
|
matthiasm@42
|
150 ParameterDescriptor d;
|
matthiasm@42
|
151 d.identifier = "useNNLS";
|
matthiasm@42
|
152 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
153 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
154 d.unit = "";
|
matthiasm@42
|
155 d.minValue = 0.0;
|
matthiasm@42
|
156 d.maxValue = 1.0;
|
matthiasm@42
|
157 d.defaultValue = 1.0;
|
matthiasm@42
|
158 d.isQuantized = true;
|
matthiasm@42
|
159 d.quantizeStep = 1.0;
|
matthiasm@42
|
160 list.push_back(d);
|
matthiasm@42
|
161
|
mail@41
|
162 ParameterDescriptor d0;
|
mail@41
|
163 d0.identifier = "rollon";
|
mail@41
|
164 d0.name = "spectral roll-on";
|
matthiasm@58
|
165 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
166 d0.unit = "%";
|
mail@41
|
167 d0.minValue = 0;
|
matthiasm@59
|
168 d0.maxValue = 5;
|
mail@41
|
169 d0.defaultValue = 0;
|
matthiasm@48
|
170 d0.isQuantized = true;
|
matthiasm@59
|
171 d0.quantizeStep = 0.5;
|
mail@41
|
172 list.push_back(d0);
|
matthiasm@4
|
173
|
matthiasm@4
|
174 ParameterDescriptor d1;
|
matthiasm@4
|
175 d1.identifier = "tuningmode";
|
matthiasm@4
|
176 d1.name = "tuning mode";
|
matthiasm@4
|
177 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
178 d1.unit = "";
|
matthiasm@4
|
179 d1.minValue = 0;
|
matthiasm@4
|
180 d1.maxValue = 1;
|
matthiasm@4
|
181 d1.defaultValue = 0;
|
matthiasm@4
|
182 d1.isQuantized = true;
|
matthiasm@4
|
183 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
184 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
185 d1.quantizeStep = 1.0;
|
matthiasm@4
|
186 list.push_back(d1);
|
matthiasm@4
|
187
|
mail@41
|
188 ParameterDescriptor d2;
|
mail@41
|
189 d2.identifier = "whitening";
|
mail@41
|
190 d2.name = "spectral whitening";
|
mail@41
|
191 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
192 d2.unit = "";
|
mail@41
|
193 d2.isQuantized = true;
|
mail@41
|
194 d2.minValue = 0.0;
|
mail@41
|
195 d2.maxValue = 1.0;
|
mail@41
|
196 d2.defaultValue = 1.0;
|
mail@41
|
197 d2.isQuantized = false;
|
mail@41
|
198 list.push_back(d2);
|
mail@41
|
199
|
mail@41
|
200 ParameterDescriptor d3;
|
mail@41
|
201 d3.identifier = "s";
|
mail@41
|
202 d3.name = "spectral shape";
|
mail@41
|
203 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
204 d3.unit = "";
|
mail@41
|
205 d3.minValue = 0.5;
|
mail@41
|
206 d3.maxValue = 0.9;
|
mail@41
|
207 d3.defaultValue = 0.7;
|
mail@41
|
208 d3.isQuantized = false;
|
mail@41
|
209 list.push_back(d3);
|
mail@41
|
210
|
Chris@23
|
211 ParameterDescriptor d4;
|
matthiasm@12
|
212 d4.identifier = "chromanormalize";
|
matthiasm@12
|
213 d4.name = "chroma normalization";
|
matthiasm@12
|
214 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
215 d4.unit = "";
|
matthiasm@12
|
216 d4.minValue = 0;
|
matthiasm@13
|
217 d4.maxValue = 3;
|
matthiasm@12
|
218 d4.defaultValue = 0;
|
matthiasm@12
|
219 d4.isQuantized = true;
|
matthiasm@13
|
220 d4.valueNames.push_back("none");
|
matthiasm@13
|
221 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
222 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
223 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
224 d4.quantizeStep = 1.0;
|
matthiasm@12
|
225 list.push_back(d4);
|
matthiasm@4
|
226
|
matthiasm@0
|
227 return list;
|
matthiasm@0
|
228 }
|
matthiasm@0
|
229
|
matthiasm@0
|
230 float
|
Chris@35
|
231 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
232 {
|
Chris@23
|
233 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
234 if (identifier == "useNNLS") {
|
matthiasm@42
|
235 return m_useNNLS;
|
matthiasm@0
|
236 }
|
matthiasm@0
|
237
|
mail@41
|
238 if (identifier == "whitening") {
|
mail@41
|
239 return m_whitening;
|
mail@41
|
240 }
|
mail@41
|
241
|
mail@41
|
242 if (identifier == "s") {
|
mail@41
|
243 return m_s;
|
matthiasm@0
|
244 }
|
matthiasm@17
|
245
|
Chris@23
|
246 if (identifier == "rollon") {
|
matthiasm@17
|
247 return m_rollon;
|
matthiasm@17
|
248 }
|
matthiasm@0
|
249
|
matthiasm@0
|
250 if (identifier == "tuningmode") {
|
matthiasm@0
|
251 if (m_tuneLocal) {
|
matthiasm@0
|
252 return 1.0;
|
matthiasm@0
|
253 } else {
|
matthiasm@0
|
254 return 0.0;
|
matthiasm@0
|
255 }
|
matthiasm@0
|
256 }
|
Chris@23
|
257 if (identifier == "preset") {
|
Chris@23
|
258 return m_preset;
|
matthiasm@3
|
259 }
|
Chris@23
|
260 if (identifier == "chromanormalize") {
|
Chris@23
|
261 return m_doNormalizeChroma;
|
matthiasm@12
|
262 }
|
matthiasm@50
|
263
|
matthiasm@50
|
264 if (identifier == "useHMM") {
|
matthiasm@50
|
265 return m_useHMM;
|
matthiasm@50
|
266 }
|
matthiasm@50
|
267
|
matthiasm@0
|
268 return 0;
|
matthiasm@0
|
269
|
matthiasm@0
|
270 }
|
matthiasm@0
|
271
|
matthiasm@0
|
272 void
|
Chris@35
|
273 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
274 {
|
Chris@23
|
275 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
276 if (identifier == "useNNLS") {
|
matthiasm@42
|
277 m_useNNLS = (int) value;
|
matthiasm@0
|
278 }
|
matthiasm@0
|
279
|
mail@41
|
280 if (identifier == "whitening") {
|
mail@41
|
281 m_whitening = value;
|
matthiasm@0
|
282 }
|
matthiasm@0
|
283
|
mail@41
|
284 if (identifier == "s") {
|
mail@41
|
285 m_s = value;
|
mail@41
|
286 }
|
mail@41
|
287
|
matthiasm@50
|
288 if (identifier == "useHMM") {
|
matthiasm@50
|
289 m_useHMM = value;
|
matthiasm@50
|
290 }
|
matthiasm@50
|
291
|
matthiasm@0
|
292 if (identifier == "tuningmode") {
|
mail@60
|
293 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
294 m_tuneLocal = value;
|
matthiasm@0
|
295 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
296 }
|
matthiasm@42
|
297 // if (identifier == "preset") {
|
matthiasm@42
|
298 // m_preset = value;
|
matthiasm@42
|
299 // if (m_preset == 0.0) {
|
matthiasm@42
|
300 // m_tuneLocal = false;
|
matthiasm@42
|
301 // m_whitening = 1.0;
|
matthiasm@42
|
302 // m_dictID = 0.0;
|
matthiasm@42
|
303 // }
|
matthiasm@42
|
304 // if (m_preset == 1.0) {
|
matthiasm@42
|
305 // m_tuneLocal = false;
|
matthiasm@42
|
306 // m_whitening = 1.0;
|
matthiasm@42
|
307 // m_dictID = 1.0;
|
matthiasm@42
|
308 // }
|
matthiasm@42
|
309 // if (m_preset == 2.0) {
|
matthiasm@42
|
310 // m_tuneLocal = false;
|
matthiasm@42
|
311 // m_whitening = 0.7;
|
matthiasm@42
|
312 // m_dictID = 0.0;
|
matthiasm@42
|
313 // }
|
matthiasm@42
|
314 // }
|
Chris@23
|
315 if (identifier == "chromanormalize") {
|
Chris@23
|
316 m_doNormalizeChroma = value;
|
Chris@23
|
317 }
|
matthiasm@17
|
318
|
Chris@23
|
319 if (identifier == "rollon") {
|
Chris@23
|
320 m_rollon = value;
|
Chris@23
|
321 }
|
matthiasm@0
|
322 }
|
matthiasm@0
|
323
|
Chris@35
|
324 NNLSBase::ProgramList
|
Chris@35
|
325 NNLSBase::getPrograms() const
|
matthiasm@0
|
326 {
|
Chris@23
|
327 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
328 ProgramList list;
|
matthiasm@0
|
329
|
matthiasm@0
|
330 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
331 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
332
|
matthiasm@0
|
333 return list;
|
matthiasm@0
|
334 }
|
matthiasm@0
|
335
|
matthiasm@0
|
336 string
|
Chris@35
|
337 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
338 {
|
Chris@23
|
339 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
340 return ""; // no programs
|
matthiasm@0
|
341 }
|
matthiasm@0
|
342
|
matthiasm@0
|
343 void
|
Chris@35
|
344 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
345 {
|
Chris@23
|
346 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
347 }
|
matthiasm@0
|
348
|
matthiasm@0
|
349
|
matthiasm@0
|
350 bool
|
Chris@35
|
351 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
352 {
|
Chris@23
|
353 if (debug_on) {
|
Chris@23
|
354 cerr << "--> initialise";
|
Chris@23
|
355 }
|
matthiasm@1
|
356
|
matthiasm@0
|
357 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
358 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
359 m_blockSize = blockSize;
|
matthiasm@0
|
360 m_stepSize = stepSize;
|
Chris@35
|
361 m_frameCount = 0;
|
Chris@23
|
362 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
363 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
364 float *tempkernel;
|
matthiasm@1
|
365
|
Chris@23
|
366 tempkernel = new float[tempn];
|
matthiasm@1
|
367
|
Chris@23
|
368 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
369 m_kernelValue.clear();
|
Chris@23
|
370 m_kernelFftIndex.clear();
|
Chris@23
|
371 m_kernelNoteIndex.clear();
|
Chris@23
|
372 int countNonzero = 0;
|
Chris@23
|
373 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
374 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
375 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
376 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
377 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
378 countNonzero++;
|
Chris@23
|
379 }
|
Chris@23
|
380 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
381 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
382 }
|
Chris@23
|
383 }
|
Chris@23
|
384 }
|
Chris@23
|
385 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
386 delete [] tempkernel;
|
Chris@35
|
387 /*
|
Chris@23
|
388 ofstream myfile;
|
Chris@23
|
389 myfile.open ("matrix.txt");
|
matthiasm@3
|
390 // myfile << "Writing this to a file.\n";
|
Chris@23
|
391 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
392 myfile << m_dict[i] << endl;
|
Chris@23
|
393 }
|
matthiasm@3
|
394 myfile.close();
|
Chris@35
|
395 */
|
matthiasm@0
|
396 return true;
|
matthiasm@0
|
397 }
|
matthiasm@0
|
398
|
matthiasm@0
|
399 void
|
Chris@35
|
400 NNLSBase::reset()
|
matthiasm@0
|
401 {
|
Chris@23
|
402 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
403
|
matthiasm@0
|
404 // Clear buffers, reset stored values, etc
|
Chris@35
|
405 m_frameCount = 0;
|
matthiasm@42
|
406 // m_dictID = 0;
|
Chris@35
|
407 m_logSpectrum.clear();
|
Chris@23
|
408 m_meanTuning0 = 0;
|
Chris@23
|
409 m_meanTuning1 = 0;
|
Chris@23
|
410 m_meanTuning2 = 0;
|
Chris@23
|
411 m_localTuning0 = 0;
|
Chris@23
|
412 m_localTuning1 = 0;
|
Chris@23
|
413 m_localTuning2 = 0;
|
Chris@23
|
414 m_localTuning.clear();
|
matthiasm@0
|
415 }
|
matthiasm@0
|
416
|
Chris@35
|
417 void
|
Chris@35
|
418 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
419 {
|
Chris@35
|
420 m_frameCount++;
|
Chris@23
|
421 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
422
|
Chris@23
|
423 const float *fbuf = inputBuffers[0];
|
Chris@23
|
424 float energysum = 0;
|
Chris@23
|
425 // make magnitude
|
Chris@23
|
426 float maxmag = -10000;
|
Chris@23
|
427 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
428 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
429 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
430 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
431 if (m_rollon > 0) {
|
Chris@23
|
432 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
433 }
|
Chris@23
|
434 }
|
matthiasm@14
|
435
|
Chris@23
|
436 float cumenergy = 0;
|
Chris@23
|
437 if (m_rollon > 0) {
|
Chris@23
|
438 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
439 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
440 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
441 else break;
|
Chris@23
|
442 }
|
Chris@23
|
443 }
|
matthiasm@17
|
444
|
Chris@23
|
445 if (maxmag < 2) {
|
Chris@23
|
446 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
447 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
448 magnitude[iBin] = 0;
|
Chris@23
|
449 }
|
Chris@23
|
450 }
|
matthiasm@4
|
451
|
Chris@23
|
452 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
453 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
454 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
455 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
456 }
|
Chris@23
|
457 int binCount = 0;
|
Chris@23
|
458 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
459 // cerr << ".";
|
Chris@23
|
460 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
461 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
462 binCount++;
|
Chris@23
|
463 }
|
Chris@23
|
464 // cerr << nm[20];
|
Chris@23
|
465 // cerr << endl;
|
matthiasm@0
|
466
|
matthiasm@0
|
467
|
Chris@35
|
468 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
469 // update means of complex tuning variables
|
Chris@35
|
470 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
471 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
472 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
|
matthiasm@0
|
473
|
matthiasm@0
|
474 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
475 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
476 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
477 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
478 float ratioOld = 0.997;
|
matthiasm@3
|
479 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
480 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
481 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
482 }
|
matthiasm@0
|
483
|
matthiasm@0
|
484 // if (m_tuneLocal) {
|
Chris@23
|
485 // local tuning
|
Chris@23
|
486 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
487 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
488 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
489 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
490
|
Chris@23
|
491 Feature f1; // logfreqspec
|
Chris@23
|
492 f1.hasTimestamp = true;
|
matthiasm@0
|
493 f1.timestamp = timestamp;
|
Chris@23
|
494 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
495 f1.values.push_back(nm[iNote]);
|
Chris@23
|
496 }
|
matthiasm@0
|
497
|
matthiasm@0
|
498 // deletes
|
matthiasm@0
|
499 delete[] magnitude;
|
matthiasm@0
|
500 delete[] nm;
|
matthiasm@0
|
501
|
Chris@35
|
502 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
503 }
|
matthiasm@0
|
504
|
Chris@35
|
505
|
Chris@35
|
506 #ifdef NOT_DEFINED
|
Chris@35
|
507
|
Chris@35
|
508 NNLSBase::FeatureSet
|
Chris@35
|
509 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
510 {
|
Chris@23
|
511 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
512 FeatureSet fsOut;
|
Chris@35
|
513 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
514 int nChord = m_chordnames.size();
|
Chris@23
|
515 //
|
Chris@23
|
516 /** Calculate Tuning
|
Chris@23
|
517 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
518 cumulative mean real and imag values)
|
Chris@23
|
519 **/
|
Chris@23
|
520 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
521 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
522 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
523 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
524 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
525 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
526
|
Chris@23
|
527 char buffer0 [50];
|
matthiasm@1
|
528
|
Chris@23
|
529 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
530
|
Chris@23
|
531 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
532
|
Chris@23
|
533 // push tuning to FeatureSet fsOut
|
Chris@23
|
534 Feature f0; // tuning
|
Chris@23
|
535 f0.hasTimestamp = true;
|
Chris@23
|
536 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
537 f0.label = buffer0;
|
Chris@23
|
538 fsOut[0].push_back(f0);
|
matthiasm@1
|
539
|
Chris@23
|
540 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
541 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
542 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
543 **/
|
Chris@23
|
544 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
545
|
Chris@23
|
546 float tempValue = 0;
|
Chris@23
|
547 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
548 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
549 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
550 int count = 0;
|
matthiasm@1
|
551
|
Chris@35
|
552 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
553 Feature f1 = *i;
|
Chris@23
|
554 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
555 f2.hasTimestamp = true;
|
Chris@23
|
556 f2.timestamp = f1.timestamp;
|
Chris@23
|
557 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
558
|
mail@60
|
559 if (m_tuneLocal == 1.0) {
|
Chris@23
|
560 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
561 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
562 }
|
matthiasm@1
|
563
|
Chris@23
|
564 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
565
|
Chris@23
|
566 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
567 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
568 f2.values.push_back(tempValue);
|
Chris@23
|
569 }
|
matthiasm@1
|
570
|
Chris@23
|
571 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
572 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
573 vector<float> runningstd;
|
Chris@23
|
574 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
575 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
576 }
|
Chris@23
|
577 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
578 for (int i = 0; i < 256; i++) {
|
Chris@23
|
579 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
580 if (runningstd[i] > 0) {
|
Chris@23
|
581 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@41
|
582 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
583 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@41
|
584 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
585 }
|
Chris@23
|
586 if (f2.values[i] < 0) {
|
Chris@23
|
587 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
588 }
|
Chris@23
|
589 }
|
Chris@23
|
590 fsOut[2].push_back(f2);
|
Chris@23
|
591 count++;
|
Chris@23
|
592 }
|
Chris@23
|
593 cerr << "done." << endl;
|
matthiasm@1
|
594
|
Chris@23
|
595 /** Semitone spectrum and chromagrams
|
Chris@23
|
596 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
597 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
598 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
599 bass and treble stacked onto each other).
|
Chris@23
|
600 **/
|
matthiasm@42
|
601 if (m_useNNLS == 0) {
|
Chris@23
|
602 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
603 } else {
|
Chris@23
|
604 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
605 }
|
matthiasm@13
|
606
|
matthiasm@1
|
607
|
Chris@23
|
608 vector<vector<float> > chordogram;
|
Chris@23
|
609 vector<vector<int> > scoreChordogram;
|
Chris@23
|
610 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
611 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
612 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
613 count = 0;
|
matthiasm@9
|
614
|
Chris@23
|
615 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
616 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
617 Feature f3; // semitone spectrum
|
Chris@23
|
618 Feature f4; // treble chromagram
|
Chris@23
|
619 Feature f5; // bass chromagram
|
Chris@23
|
620 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
621
|
Chris@23
|
622 f3.hasTimestamp = true;
|
Chris@23
|
623 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
624
|
Chris@23
|
625 f4.hasTimestamp = true;
|
Chris@23
|
626 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
627
|
Chris@23
|
628 f5.hasTimestamp = true;
|
Chris@23
|
629 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
630
|
Chris@23
|
631 f6.hasTimestamp = true;
|
Chris@23
|
632 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
633
|
Chris@29
|
634 float b[256];
|
matthiasm@1
|
635
|
Chris@23
|
636 bool some_b_greater_zero = false;
|
Chris@23
|
637 float sumb = 0;
|
Chris@23
|
638 for (int i = 0; i < 256; i++) {
|
Chris@23
|
639 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
640 b[i] = f2.values[i];
|
Chris@23
|
641 sumb += b[i];
|
Chris@23
|
642 if (b[i] > 0) {
|
Chris@23
|
643 some_b_greater_zero = true;
|
Chris@23
|
644 }
|
Chris@23
|
645 }
|
matthiasm@1
|
646
|
Chris@23
|
647 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
648
|
Chris@23
|
649 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
650 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
651 float currval;
|
Chris@23
|
652 unsigned iSemitone = 0;
|
matthiasm@1
|
653
|
Chris@23
|
654 if (some_b_greater_zero) {
|
matthiasm@42
|
655 if (m_useNNLS == 0) {
|
Chris@23
|
656 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
657 currval = 0;
|
Chris@23
|
658 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
659 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
660 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
661 f3.values.push_back(currval);
|
Chris@23
|
662 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
663 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
664 iSemitone++;
|
Chris@23
|
665 }
|
matthiasm@1
|
666
|
Chris@23
|
667 } else {
|
Chris@29
|
668 float x[84+1000];
|
Chris@23
|
669 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
670 vector<int> signifIndex;
|
Chris@23
|
671 int index=0;
|
Chris@23
|
672 sumb /= 84.0;
|
Chris@23
|
673 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
674 float currval = 0;
|
Chris@23
|
675 currval += b[iNote + 1 + -1];
|
Chris@23
|
676 currval += b[iNote + 1 + 0];
|
Chris@23
|
677 currval += b[iNote + 1 + 1];
|
Chris@23
|
678 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
679 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
680 index++;
|
Chris@23
|
681 }
|
Chris@29
|
682 float rnorm;
|
Chris@29
|
683 float w[84+1000];
|
Chris@29
|
684 float zz[84+1000];
|
Chris@23
|
685 int indx[84+1000];
|
Chris@23
|
686 int mode;
|
Chris@23
|
687 int dictsize = 256*signifIndex.size();
|
Chris@23
|
688 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
689 float *curr_dict = new float[dictsize];
|
Chris@23
|
690 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
691 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
692 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
693 }
|
Chris@23
|
694 }
|
Chris@29
|
695 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
696 delete [] curr_dict;
|
Chris@23
|
697 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
698 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
699 // cerr << mode << endl;
|
Chris@23
|
700 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
701 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
702 }
|
Chris@23
|
703 }
|
Chris@23
|
704 }
|
matthiasm@13
|
705
|
matthiasm@10
|
706
|
matthiasm@12
|
707
|
matthiasm@13
|
708
|
Chris@23
|
709 f4.values = chroma;
|
Chris@23
|
710 f5.values = basschroma;
|
Chris@23
|
711 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
712 f6.values = chroma;
|
matthiasm@1
|
713
|
Chris@23
|
714 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
715 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
716 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
717 case 0: // should never end up here
|
Chris@23
|
718 break;
|
Chris@23
|
719 case 1:
|
Chris@23
|
720 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
721 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
722 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
723 break;
|
Chris@23
|
724 case 2:
|
Chris@23
|
725 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
726 chromanorm[0] += *it;
|
Chris@23
|
727 }
|
Chris@23
|
728 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
729 chromanorm[1] += *it;
|
Chris@23
|
730 }
|
Chris@23
|
731 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
732 chromanorm[2] += *it;
|
Chris@23
|
733 }
|
Chris@23
|
734 break;
|
Chris@23
|
735 case 3:
|
Chris@23
|
736 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
737 chromanorm[0] += pow(*it,2);
|
Chris@23
|
738 }
|
Chris@23
|
739 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
740 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
741 chromanorm[1] += pow(*it,2);
|
Chris@23
|
742 }
|
Chris@23
|
743 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
744 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
745 chromanorm[2] += pow(*it,2);
|
Chris@23
|
746 }
|
Chris@23
|
747 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
748 break;
|
Chris@23
|
749 }
|
Chris@23
|
750 if (chromanorm[0] > 0) {
|
Chris@23
|
751 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
752 f4.values[i] /= chromanorm[0];
|
Chris@23
|
753 }
|
Chris@23
|
754 }
|
Chris@23
|
755 if (chromanorm[1] > 0) {
|
Chris@23
|
756 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
757 f5.values[i] /= chromanorm[1];
|
Chris@23
|
758 }
|
Chris@23
|
759 }
|
Chris@23
|
760 if (chromanorm[2] > 0) {
|
Chris@23
|
761 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
762 f6.values[i] /= chromanorm[2];
|
Chris@23
|
763 }
|
Chris@23
|
764 }
|
matthiasm@13
|
765
|
Chris@23
|
766 }
|
matthiasm@13
|
767
|
Chris@23
|
768 // local chord estimation
|
Chris@23
|
769 vector<float> currentChordSalience;
|
Chris@23
|
770 float tempchordvalue = 0;
|
Chris@23
|
771 float sumchordvalue = 0;
|
matthiasm@9
|
772
|
Chris@23
|
773 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
774 tempchordvalue = 0;
|
Chris@23
|
775 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
776 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
777 }
|
Chris@23
|
778 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
779 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
780 }
|
Chris@23
|
781 sumchordvalue+=tempchordvalue;
|
Chris@23
|
782 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
783 }
|
Chris@23
|
784 if (sumchordvalue > 0) {
|
Chris@23
|
785 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
786 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
787 }
|
Chris@23
|
788 } else {
|
Chris@23
|
789 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
790 }
|
Chris@23
|
791 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
792
|
Chris@23
|
793 fsOut[3].push_back(f3);
|
Chris@23
|
794 fsOut[4].push_back(f4);
|
Chris@23
|
795 fsOut[5].push_back(f5);
|
Chris@23
|
796 fsOut[6].push_back(f6);
|
Chris@23
|
797 count++;
|
Chris@23
|
798 }
|
Chris@23
|
799 cerr << "done." << endl;
|
matthiasm@13
|
800
|
matthiasm@10
|
801
|
Chris@23
|
802 /* Simple chord estimation
|
Chris@23
|
803 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
804 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
805 */
|
Chris@23
|
806 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
807 count = 0;
|
Chris@23
|
808 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
809 vector<int> chordSequence;
|
Chris@23
|
810 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
811 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
812 scoreChordogram.push_back(temp);
|
Chris@23
|
813 }
|
Chris@23
|
814 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
815 int startIndex = count + 1;
|
Chris@23
|
816 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
817
|
Chris@23
|
818 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
819
|
Chris@23
|
820 vector<int> chordCandidates;
|
Chris@23
|
821 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
822 // float currsum = 0;
|
Chris@23
|
823 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
824 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
825 // }
|
Chris@23
|
826 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
827 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
828 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
829 chordCandidates.push_back(iChord);
|
Chris@23
|
830 break;
|
Chris@23
|
831 }
|
Chris@23
|
832 }
|
Chris@23
|
833 }
|
Chris@23
|
834 chordCandidates.push_back(nChord-1);
|
Chris@23
|
835 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
836
|
Chris@23
|
837 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
838 float maxindex = 0; //... and the index thereof
|
Chris@23
|
839 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
840 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
841
|
Chris@23
|
842 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
843 // now find the max values on both sides of iWF
|
Chris@23
|
844 // left side:
|
Chris@23
|
845 float maxL = 0;
|
Chris@23
|
846 unsigned maxindL = nChord-1;
|
Chris@23
|
847 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
848 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
849 float currsum = 0;
|
Chris@23
|
850 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
851 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
852 }
|
Chris@23
|
853 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
854 if (currsum > maxL) {
|
Chris@23
|
855 maxL = currsum;
|
Chris@23
|
856 maxindL = iChord;
|
Chris@23
|
857 }
|
Chris@23
|
858 }
|
Chris@23
|
859 // right side:
|
Chris@23
|
860 float maxR = 0;
|
Chris@23
|
861 unsigned maxindR = nChord-1;
|
Chris@23
|
862 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
863 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
864 float currsum = 0;
|
Chris@23
|
865 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
866 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
867 }
|
Chris@23
|
868 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
869 if (currsum > maxR) {
|
Chris@23
|
870 maxR = currsum;
|
Chris@23
|
871 maxindR = iChord;
|
Chris@23
|
872 }
|
Chris@23
|
873 }
|
Chris@23
|
874 if (maxL+maxR > maxval) {
|
Chris@23
|
875 maxval = maxL+maxR;
|
Chris@23
|
876 maxindex = iWF;
|
Chris@23
|
877 bestchordL = maxindL;
|
Chris@23
|
878 bestchordR = maxindR;
|
Chris@23
|
879 }
|
matthiasm@3
|
880
|
Chris@23
|
881 }
|
Chris@23
|
882 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
883 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
884 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
885 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
886 }
|
Chris@23
|
887 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
888 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
889 }
|
Chris@23
|
890 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
891 count++;
|
Chris@23
|
892 }
|
Chris@23
|
893 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
894 count = 0;
|
Chris@23
|
895 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
896 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
897 float maxindex = 0; //... and the index thereof
|
Chris@23
|
898 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
899 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
900 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
901 maxindex = iChord;
|
Chris@23
|
902 // cerr << iChord << endl;
|
Chris@23
|
903 }
|
Chris@23
|
904 }
|
Chris@23
|
905 chordSequence.push_back(maxindex);
|
Chris@23
|
906 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
907 count++;
|
Chris@23
|
908 }
|
Chris@23
|
909 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
910
|
matthiasm@3
|
911
|
Chris@23
|
912 // mode filter on chordSequence
|
Chris@23
|
913 count = 0;
|
Chris@23
|
914 string oldChord = "";
|
Chris@23
|
915 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
916 Feature f6 = *it;
|
Chris@23
|
917 Feature f7; // chord estimate
|
Chris@23
|
918 f7.hasTimestamp = true;
|
Chris@23
|
919 f7.timestamp = f6.timestamp;
|
Chris@23
|
920 Feature f8; // chord estimate
|
Chris@23
|
921 f8.hasTimestamp = true;
|
Chris@23
|
922 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
923
|
Chris@23
|
924 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
925 int maxChordCount = 0;
|
Chris@23
|
926 int maxChordIndex = nChord-1;
|
Chris@23
|
927 string maxChord;
|
Chris@23
|
928 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
929 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
930 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
931 chordCount[chordSequence[i]]++;
|
Chris@23
|
932 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
933 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
934 maxChordCount++;
|
Chris@23
|
935 maxChordIndex = chordSequence[i];
|
Chris@23
|
936 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
937 }
|
Chris@23
|
938 }
|
Chris@23
|
939 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
940 // cerr << maxChordIndex << endl;
|
Chris@23
|
941 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
942 // cerr << chordchange[count] << endl;
|
Chris@23
|
943 fsOut[9].push_back(f8);
|
Chris@23
|
944 if (oldChord != maxChord) {
|
Chris@23
|
945 oldChord = maxChord;
|
matthiasm@3
|
946
|
Chris@23
|
947 // char buffer1 [50];
|
Chris@23
|
948 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
949 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
950 // } else {
|
Chris@23
|
951 // sprintf(buffer1, "N");
|
Chris@23
|
952 // }
|
Chris@23
|
953 // f7.label = buffer1;
|
Chris@23
|
954 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
955 fsOut[7].push_back(f7);
|
Chris@23
|
956 }
|
Chris@23
|
957 count++;
|
Chris@23
|
958 }
|
Chris@23
|
959 Feature f7; // last chord estimate
|
Chris@23
|
960 f7.hasTimestamp = true;
|
Chris@23
|
961 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
962 f7.label = "N";
|
Chris@23
|
963 fsOut[7].push_back(f7);
|
Chris@23
|
964 cerr << "done." << endl;
|
Chris@23
|
965 // // musicity
|
Chris@23
|
966 // count = 0;
|
Chris@23
|
967 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
968 // vector<float> musicityValue;
|
Chris@23
|
969 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
970 // Feature f4 = *it;
|
Chris@23
|
971 //
|
Chris@23
|
972 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
973 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
974 // float chromasum = 0;
|
Chris@23
|
975 // float diffsum = 0;
|
Chris@23
|
976 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
977 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
978 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
979 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
980 // }
|
Chris@23
|
981 // }
|
Chris@23
|
982 // diffsum /= chromasum;
|
Chris@23
|
983 // musicityValue.push_back(diffsum);
|
Chris@23
|
984 // count++;
|
Chris@23
|
985 // }
|
Chris@23
|
986 //
|
Chris@23
|
987 // float musicityThreshold = 0.44;
|
Chris@23
|
988 // if (m_stepSize == 4096) {
|
Chris@23
|
989 // musicityThreshold = 0.74;
|
Chris@23
|
990 // }
|
Chris@23
|
991 // if (m_stepSize == 4410) {
|
Chris@23
|
992 // musicityThreshold = 0.77;
|
Chris@23
|
993 // }
|
Chris@23
|
994 //
|
Chris@23
|
995 // count = 0;
|
Chris@23
|
996 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
997 // Feature f4 = *it;
|
Chris@23
|
998 // Feature f8; // musicity
|
Chris@23
|
999 // Feature f9; // musicity segmenter
|
Chris@23
|
1000 //
|
Chris@23
|
1001 // f8.hasTimestamp = true;
|
Chris@23
|
1002 // f8.timestamp = f4.timestamp;
|
Chris@23
|
1003 // f9.hasTimestamp = true;
|
Chris@23
|
1004 // f9.timestamp = f4.timestamp;
|
Chris@23
|
1005 //
|
Chris@23
|
1006 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
1007 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
1008 // int musicityCount = 0;
|
Chris@23
|
1009 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
1010 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
1011 // }
|
Chris@23
|
1012 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
1013 //
|
Chris@23
|
1014 // if (isSpeech) {
|
Chris@23
|
1015 // if (oldlabeltype != 2) {
|
Chris@23
|
1016 // f9.label = "Speech";
|
Chris@23
|
1017 // fsOut[9].push_back(f9);
|
Chris@23
|
1018 // oldlabeltype = 2;
|
Chris@23
|
1019 // }
|
Chris@23
|
1020 // } else {
|
Chris@23
|
1021 // if (oldlabeltype != 1) {
|
Chris@23
|
1022 // f9.label = "Music";
|
Chris@23
|
1023 // fsOut[9].push_back(f9);
|
Chris@23
|
1024 // oldlabeltype = 1;
|
Chris@23
|
1025 // }
|
Chris@23
|
1026 // }
|
Chris@23
|
1027 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1028 // fsOut[8].push_back(f8);
|
Chris@23
|
1029 // count++;
|
Chris@23
|
1030 // }
|
Chris@23
|
1031 return fsOut;
|
matthiasm@0
|
1032
|
matthiasm@0
|
1033 }
|
matthiasm@0
|
1034
|
Chris@35
|
1035 #endif
|