Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@27
|
31 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
32
|
Chris@35
|
33 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
34 Plugin(inputSampleRate),
|
Chris@35
|
35 m_logSpectrum(0),
|
Chris@23
|
36 m_blockSize(0),
|
Chris@23
|
37 m_stepSize(0),
|
Chris@23
|
38 m_lengthOfNoteIndex(0),
|
Chris@23
|
39 m_meanTuning0(0),
|
Chris@23
|
40 m_meanTuning1(0),
|
Chris@23
|
41 m_meanTuning2(0),
|
Chris@23
|
42 m_localTuning0(0),
|
Chris@23
|
43 m_localTuning1(0),
|
Chris@23
|
44 m_localTuning2(0),
|
Chris@23
|
45 m_paling(1.0),
|
Chris@23
|
46 m_preset(0.0),
|
Chris@23
|
47 m_localTuning(0),
|
Chris@23
|
48 m_kernelValue(0),
|
Chris@23
|
49 m_kernelFftIndex(0),
|
Chris@23
|
50 m_kernelNoteIndex(0),
|
Chris@23
|
51 m_dict(0),
|
Chris@23
|
52 m_tuneLocal(false),
|
Chris@23
|
53 m_dictID(0),
|
Chris@23
|
54 m_chorddict(0),
|
Chris@23
|
55 m_chordnames(0),
|
Chris@23
|
56 m_doNormalizeChroma(0),
|
Chris@23
|
57 m_rollon(0.01)
|
matthiasm@0
|
58 {
|
Chris@35
|
59 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
60
|
Chris@23
|
61 // make the *note* dictionary matrix
|
Chris@23
|
62 m_dict = new float[nNote * 84];
|
Chris@23
|
63 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
Chris@23
|
64 dictionaryMatrix(m_dict);
|
matthiasm@7
|
65
|
Chris@23
|
66 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
67 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69
|
matthiasm@0
|
70
|
Chris@35
|
71 NNLSBase::~NNLSBase()
|
matthiasm@0
|
72 {
|
Chris@35
|
73 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
74 delete [] m_dict;
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76
|
matthiasm@0
|
77 string
|
Chris@35
|
78 NNLSBase::getMaker() const
|
matthiasm@0
|
79 {
|
Chris@23
|
80 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
81 // Your name here
|
matthiasm@0
|
82 return "Matthias Mauch";
|
matthiasm@0
|
83 }
|
matthiasm@0
|
84
|
matthiasm@0
|
85 int
|
Chris@35
|
86 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
87 {
|
Chris@23
|
88 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
89 // Increment this each time you release a version that behaves
|
matthiasm@0
|
90 // differently from the previous one
|
matthiasm@0
|
91 return 1;
|
matthiasm@0
|
92 }
|
matthiasm@0
|
93
|
matthiasm@0
|
94 string
|
Chris@35
|
95 NNLSBase::getCopyright() const
|
matthiasm@0
|
96 {
|
Chris@23
|
97 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
98 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
99 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
100 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
101 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
102 return "GPL";
|
matthiasm@0
|
103 }
|
matthiasm@0
|
104
|
Chris@35
|
105 NNLSBase::InputDomain
|
Chris@35
|
106 NNLSBase::getInputDomain() const
|
matthiasm@0
|
107 {
|
Chris@23
|
108 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
109 return FrequencyDomain;
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111
|
matthiasm@0
|
112 size_t
|
Chris@35
|
113 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
114 {
|
Chris@23
|
115 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
116 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
117 }
|
matthiasm@0
|
118
|
matthiasm@0
|
119 size_t
|
Chris@35
|
120 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
121 {
|
Chris@23
|
122 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
123 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
124 // means the same as the block size for TimeDomain
|
Chris@23
|
125 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
126 }
|
matthiasm@0
|
127
|
matthiasm@0
|
128 size_t
|
Chris@35
|
129 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
130 {
|
Chris@23
|
131 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
132 return 1;
|
matthiasm@0
|
133 }
|
matthiasm@0
|
134
|
matthiasm@0
|
135 size_t
|
Chris@35
|
136 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
137 {
|
Chris@23
|
138 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
139 return 1;
|
matthiasm@0
|
140 }
|
matthiasm@0
|
141
|
Chris@35
|
142 NNLSBase::ParameterList
|
Chris@35
|
143 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
144 {
|
Chris@23
|
145 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
146 ParameterList list;
|
matthiasm@0
|
147
|
matthiasm@3
|
148 ParameterDescriptor d3;
|
matthiasm@3
|
149 d3.identifier = "preset";
|
matthiasm@3
|
150 d3.name = "preset";
|
matthiasm@3
|
151 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
152 d3.unit = "";
|
Chris@23
|
153 d3.isQuantized = true;
|
Chris@23
|
154 d3.quantizeStep = 1;
|
matthiasm@3
|
155 d3.minValue = 0.0;
|
matthiasm@4
|
156 d3.maxValue = 3.0;
|
matthiasm@3
|
157 d3.defaultValue = 0.0;
|
matthiasm@3
|
158 d3.valueNames.push_back("polyphonic pop");
|
Chris@23
|
159 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
160 d3.valueNames.push_back("solo keyboard");
|
Chris@23
|
161 d3.valueNames.push_back("manual");
|
matthiasm@3
|
162 list.push_back(d3);
|
matthiasm@4
|
163
|
matthiasm@17
|
164 ParameterDescriptor d5;
|
Chris@23
|
165 d5.identifier = "rollon";
|
Chris@23
|
166 d5.name = "spectral roll-on";
|
Chris@23
|
167 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
|
Chris@23
|
168 d5.unit = "";
|
Chris@23
|
169 d5.minValue = 0;
|
Chris@23
|
170 d5.maxValue = 1;
|
Chris@23
|
171 d5.defaultValue = 0;
|
Chris@23
|
172 d5.isQuantized = false;
|
Chris@23
|
173 list.push_back(d5);
|
matthiasm@17
|
174
|
matthiasm@4
|
175 // ParameterDescriptor d0;
|
matthiasm@4
|
176 // d0.identifier = "notedict";
|
matthiasm@4
|
177 // d0.name = "note dictionary";
|
matthiasm@4
|
178 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
179 // d0.unit = "";
|
matthiasm@4
|
180 // d0.minValue = 0;
|
matthiasm@4
|
181 // d0.maxValue = 1;
|
matthiasm@4
|
182 // d0.defaultValue = 0;
|
matthiasm@4
|
183 // d0.isQuantized = true;
|
matthiasm@4
|
184 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
185 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
186 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
187 // list.push_back(d0);
|
matthiasm@4
|
188
|
matthiasm@4
|
189 ParameterDescriptor d1;
|
matthiasm@4
|
190 d1.identifier = "tuningmode";
|
matthiasm@4
|
191 d1.name = "tuning mode";
|
matthiasm@4
|
192 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
193 d1.unit = "";
|
matthiasm@4
|
194 d1.minValue = 0;
|
matthiasm@4
|
195 d1.maxValue = 1;
|
matthiasm@4
|
196 d1.defaultValue = 0;
|
matthiasm@4
|
197 d1.isQuantized = true;
|
matthiasm@4
|
198 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
199 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
200 d1.quantizeStep = 1.0;
|
matthiasm@4
|
201 list.push_back(d1);
|
matthiasm@4
|
202
|
Chris@23
|
203 // ParameterDescriptor d2;
|
Chris@23
|
204 // d2.identifier = "paling";
|
Chris@23
|
205 // d2.name = "spectral paling";
|
Chris@23
|
206 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
Chris@23
|
207 // d2.unit = "";
|
Chris@23
|
208 // d2.isQuantized = true;
|
Chris@23
|
209 // // d2.quantizeStep = 0.1;
|
Chris@23
|
210 // d2.minValue = 0.0;
|
Chris@23
|
211 // d2.maxValue = 1.0;
|
Chris@23
|
212 // d2.defaultValue = 1.0;
|
Chris@23
|
213 // d2.isQuantized = false;
|
Chris@23
|
214 // list.push_back(d2);
|
Chris@23
|
215 ParameterDescriptor d4;
|
matthiasm@12
|
216 d4.identifier = "chromanormalize";
|
matthiasm@12
|
217 d4.name = "chroma normalization";
|
matthiasm@12
|
218 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
219 d4.unit = "";
|
matthiasm@12
|
220 d4.minValue = 0;
|
matthiasm@13
|
221 d4.maxValue = 3;
|
matthiasm@12
|
222 d4.defaultValue = 0;
|
matthiasm@12
|
223 d4.isQuantized = true;
|
matthiasm@13
|
224 d4.valueNames.push_back("none");
|
matthiasm@13
|
225 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
226 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
227 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
228 d4.quantizeStep = 1.0;
|
matthiasm@12
|
229 list.push_back(d4);
|
matthiasm@4
|
230
|
matthiasm@0
|
231 return list;
|
matthiasm@0
|
232 }
|
matthiasm@0
|
233
|
matthiasm@0
|
234 float
|
Chris@35
|
235 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
236 {
|
Chris@23
|
237 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
238 if (identifier == "notedict") {
|
matthiasm@0
|
239 return m_dictID;
|
matthiasm@0
|
240 }
|
matthiasm@0
|
241
|
matthiasm@0
|
242 if (identifier == "paling") {
|
matthiasm@0
|
243 return m_paling;
|
matthiasm@0
|
244 }
|
matthiasm@17
|
245
|
Chris@23
|
246 if (identifier == "rollon") {
|
matthiasm@17
|
247 return m_rollon;
|
matthiasm@17
|
248 }
|
matthiasm@0
|
249
|
matthiasm@0
|
250 if (identifier == "tuningmode") {
|
matthiasm@0
|
251 if (m_tuneLocal) {
|
matthiasm@0
|
252 return 1.0;
|
matthiasm@0
|
253 } else {
|
matthiasm@0
|
254 return 0.0;
|
matthiasm@0
|
255 }
|
matthiasm@0
|
256 }
|
Chris@23
|
257 if (identifier == "preset") {
|
Chris@23
|
258 return m_preset;
|
matthiasm@3
|
259 }
|
Chris@23
|
260 if (identifier == "chromanormalize") {
|
Chris@23
|
261 return m_doNormalizeChroma;
|
matthiasm@12
|
262 }
|
matthiasm@0
|
263 return 0;
|
matthiasm@0
|
264
|
matthiasm@0
|
265 }
|
matthiasm@0
|
266
|
matthiasm@0
|
267 void
|
Chris@35
|
268 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
269 {
|
Chris@23
|
270 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
271 if (identifier == "notedict") {
|
matthiasm@0
|
272 m_dictID = (int) value;
|
matthiasm@0
|
273 }
|
matthiasm@0
|
274
|
matthiasm@0
|
275 if (identifier == "paling") {
|
matthiasm@0
|
276 m_paling = value;
|
matthiasm@0
|
277 }
|
matthiasm@0
|
278
|
matthiasm@0
|
279 if (identifier == "tuningmode") {
|
matthiasm@0
|
280 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
281 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
282 }
|
matthiasm@3
|
283 if (identifier == "preset") {
|
matthiasm@3
|
284 m_preset = value;
|
Chris@23
|
285 if (m_preset == 0.0) {
|
Chris@23
|
286 m_tuneLocal = false;
|
Chris@23
|
287 m_paling = 1.0;
|
Chris@23
|
288 m_dictID = 0.0;
|
Chris@23
|
289 }
|
Chris@23
|
290 if (m_preset == 1.0) {
|
Chris@23
|
291 m_tuneLocal = false;
|
Chris@23
|
292 m_paling = 1.0;
|
Chris@23
|
293 m_dictID = 1.0;
|
Chris@23
|
294 }
|
Chris@23
|
295 if (m_preset == 2.0) {
|
Chris@23
|
296 m_tuneLocal = false;
|
Chris@23
|
297 m_paling = 0.7;
|
Chris@23
|
298 m_dictID = 0.0;
|
Chris@23
|
299 }
|
matthiasm@3
|
300 }
|
Chris@23
|
301 if (identifier == "chromanormalize") {
|
Chris@23
|
302 m_doNormalizeChroma = value;
|
Chris@23
|
303 }
|
matthiasm@17
|
304
|
Chris@23
|
305 if (identifier == "rollon") {
|
Chris@23
|
306 m_rollon = value;
|
Chris@23
|
307 }
|
matthiasm@0
|
308 }
|
matthiasm@0
|
309
|
Chris@35
|
310 NNLSBase::ProgramList
|
Chris@35
|
311 NNLSBase::getPrograms() const
|
matthiasm@0
|
312 {
|
Chris@23
|
313 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
314 ProgramList list;
|
matthiasm@0
|
315
|
matthiasm@0
|
316 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
317 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
318
|
matthiasm@0
|
319 return list;
|
matthiasm@0
|
320 }
|
matthiasm@0
|
321
|
matthiasm@0
|
322 string
|
Chris@35
|
323 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
324 {
|
Chris@23
|
325 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
326 return ""; // no programs
|
matthiasm@0
|
327 }
|
matthiasm@0
|
328
|
matthiasm@0
|
329 void
|
Chris@35
|
330 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
331 {
|
Chris@23
|
332 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
333 }
|
matthiasm@0
|
334
|
matthiasm@0
|
335
|
matthiasm@0
|
336 bool
|
Chris@35
|
337 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
338 {
|
Chris@23
|
339 if (debug_on) {
|
Chris@23
|
340 cerr << "--> initialise";
|
Chris@23
|
341 }
|
matthiasm@1
|
342
|
matthiasm@0
|
343 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
344 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
345 m_blockSize = blockSize;
|
matthiasm@0
|
346 m_stepSize = stepSize;
|
Chris@35
|
347 m_frameCount = 0;
|
Chris@23
|
348 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
349 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
350 float *tempkernel;
|
matthiasm@1
|
351
|
Chris@23
|
352 tempkernel = new float[tempn];
|
matthiasm@1
|
353
|
Chris@23
|
354 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
355 m_kernelValue.clear();
|
Chris@23
|
356 m_kernelFftIndex.clear();
|
Chris@23
|
357 m_kernelNoteIndex.clear();
|
Chris@23
|
358 int countNonzero = 0;
|
Chris@23
|
359 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
360 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
361 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
362 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
363 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
364 countNonzero++;
|
Chris@23
|
365 }
|
Chris@23
|
366 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
367 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
368 }
|
Chris@23
|
369 }
|
Chris@23
|
370 }
|
Chris@23
|
371 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
372 delete [] tempkernel;
|
Chris@35
|
373 /*
|
Chris@23
|
374 ofstream myfile;
|
Chris@23
|
375 myfile.open ("matrix.txt");
|
matthiasm@3
|
376 // myfile << "Writing this to a file.\n";
|
Chris@23
|
377 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
378 myfile << m_dict[i] << endl;
|
Chris@23
|
379 }
|
matthiasm@3
|
380 myfile.close();
|
Chris@35
|
381 */
|
matthiasm@0
|
382 return true;
|
matthiasm@0
|
383 }
|
matthiasm@0
|
384
|
matthiasm@0
|
385 void
|
Chris@35
|
386 NNLSBase::reset()
|
matthiasm@0
|
387 {
|
Chris@23
|
388 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
389
|
matthiasm@0
|
390 // Clear buffers, reset stored values, etc
|
Chris@35
|
391 m_frameCount = 0;
|
Chris@23
|
392 m_dictID = 0;
|
Chris@35
|
393 m_logSpectrum.clear();
|
Chris@23
|
394 m_meanTuning0 = 0;
|
Chris@23
|
395 m_meanTuning1 = 0;
|
Chris@23
|
396 m_meanTuning2 = 0;
|
Chris@23
|
397 m_localTuning0 = 0;
|
Chris@23
|
398 m_localTuning1 = 0;
|
Chris@23
|
399 m_localTuning2 = 0;
|
Chris@23
|
400 m_localTuning.clear();
|
matthiasm@0
|
401 }
|
matthiasm@0
|
402
|
Chris@35
|
403 void
|
Chris@35
|
404 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
405 {
|
Chris@35
|
406 m_frameCount++;
|
Chris@23
|
407 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
408
|
Chris@23
|
409 const float *fbuf = inputBuffers[0];
|
Chris@23
|
410 float energysum = 0;
|
Chris@23
|
411 // make magnitude
|
Chris@23
|
412 float maxmag = -10000;
|
Chris@23
|
413 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
414 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
415 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
416 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
417 if (m_rollon > 0) {
|
Chris@23
|
418 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
419 }
|
Chris@23
|
420 }
|
matthiasm@14
|
421
|
Chris@23
|
422 float cumenergy = 0;
|
Chris@23
|
423 if (m_rollon > 0) {
|
Chris@23
|
424 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
425 cumenergy += pow(magnitude[iBin],2);
|
Chris@23
|
426 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
|
Chris@23
|
427 else break;
|
Chris@23
|
428 }
|
Chris@23
|
429 }
|
matthiasm@17
|
430
|
Chris@23
|
431 if (maxmag < 2) {
|
Chris@23
|
432 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
433 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
434 magnitude[iBin] = 0;
|
Chris@23
|
435 }
|
Chris@23
|
436 }
|
matthiasm@4
|
437
|
Chris@23
|
438 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
439 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
440 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
441 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
442 }
|
Chris@23
|
443 int binCount = 0;
|
Chris@23
|
444 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
445 // cerr << ".";
|
Chris@23
|
446 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
447 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
448 binCount++;
|
Chris@23
|
449 }
|
Chris@23
|
450 // cerr << nm[20];
|
Chris@23
|
451 // cerr << endl;
|
matthiasm@0
|
452
|
matthiasm@0
|
453
|
Chris@35
|
454 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
455 // update means of complex tuning variables
|
Chris@35
|
456 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
457 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
458 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
|
matthiasm@0
|
459
|
matthiasm@0
|
460 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
461 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
462 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
463 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
464 float ratioOld = 0.997;
|
matthiasm@3
|
465 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
466 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
467 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
468 }
|
matthiasm@0
|
469
|
matthiasm@0
|
470 // if (m_tuneLocal) {
|
Chris@23
|
471 // local tuning
|
Chris@23
|
472 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
473 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
474 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
475 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
476
|
Chris@23
|
477 Feature f1; // logfreqspec
|
Chris@23
|
478 f1.hasTimestamp = true;
|
matthiasm@0
|
479 f1.timestamp = timestamp;
|
Chris@23
|
480 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
481 f1.values.push_back(nm[iNote]);
|
Chris@23
|
482 }
|
matthiasm@0
|
483
|
matthiasm@0
|
484 // deletes
|
matthiasm@0
|
485 delete[] magnitude;
|
matthiasm@0
|
486 delete[] nm;
|
matthiasm@0
|
487
|
Chris@35
|
488 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
489 }
|
matthiasm@0
|
490
|
Chris@35
|
491
|
Chris@35
|
492 #ifdef NOT_DEFINED
|
Chris@35
|
493
|
Chris@35
|
494 NNLSBase::FeatureSet
|
Chris@35
|
495 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
496 {
|
Chris@23
|
497 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
498 FeatureSet fsOut;
|
Chris@35
|
499 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
500 int nChord = m_chordnames.size();
|
Chris@23
|
501 //
|
Chris@23
|
502 /** Calculate Tuning
|
Chris@23
|
503 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
504 cumulative mean real and imag values)
|
Chris@23
|
505 **/
|
Chris@23
|
506 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
507 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
508 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
509 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
510 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
511 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
512
|
Chris@23
|
513 char buffer0 [50];
|
matthiasm@1
|
514
|
Chris@23
|
515 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
516
|
Chris@23
|
517 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
518
|
Chris@23
|
519 // push tuning to FeatureSet fsOut
|
Chris@23
|
520 Feature f0; // tuning
|
Chris@23
|
521 f0.hasTimestamp = true;
|
Chris@23
|
522 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
523 f0.label = buffer0;
|
Chris@23
|
524 fsOut[0].push_back(f0);
|
matthiasm@1
|
525
|
Chris@23
|
526 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
527 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
528 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
529 **/
|
Chris@23
|
530 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
531
|
Chris@23
|
532 float tempValue = 0;
|
Chris@23
|
533 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
534 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
535 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
536 int count = 0;
|
matthiasm@1
|
537
|
Chris@35
|
538 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
539 Feature f1 = *i;
|
Chris@23
|
540 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
541 f2.hasTimestamp = true;
|
Chris@23
|
542 f2.timestamp = f1.timestamp;
|
Chris@23
|
543 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
544
|
Chris@23
|
545 if (m_tuneLocal) {
|
Chris@23
|
546 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
547 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
548 }
|
matthiasm@1
|
549
|
Chris@23
|
550 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
551
|
Chris@23
|
552 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
553 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
554 f2.values.push_back(tempValue);
|
Chris@23
|
555 }
|
matthiasm@1
|
556
|
Chris@23
|
557 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
558 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
559 vector<float> runningstd;
|
Chris@23
|
560 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
561 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
562 }
|
Chris@23
|
563 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
564 for (int i = 0; i < 256; i++) {
|
Chris@23
|
565 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
566 if (runningstd[i] > 0) {
|
Chris@23
|
567 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
Chris@23
|
568 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
Chris@23
|
569 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
Chris@23
|
570 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
Chris@23
|
571 }
|
Chris@23
|
572 if (f2.values[i] < 0) {
|
Chris@23
|
573 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
574 }
|
Chris@23
|
575 }
|
Chris@23
|
576 fsOut[2].push_back(f2);
|
Chris@23
|
577 count++;
|
Chris@23
|
578 }
|
Chris@23
|
579 cerr << "done." << endl;
|
matthiasm@1
|
580
|
Chris@23
|
581 /** Semitone spectrum and chromagrams
|
Chris@23
|
582 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
583 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
584 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
585 bass and treble stacked onto each other).
|
Chris@23
|
586 **/
|
Chris@23
|
587 if (m_dictID == 1) {
|
Chris@23
|
588 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
589 } else {
|
Chris@23
|
590 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
591 }
|
matthiasm@13
|
592
|
matthiasm@1
|
593
|
Chris@23
|
594 vector<vector<float> > chordogram;
|
Chris@23
|
595 vector<vector<int> > scoreChordogram;
|
Chris@23
|
596 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
597 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
598 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
599 count = 0;
|
matthiasm@9
|
600
|
Chris@23
|
601 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
602 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
603 Feature f3; // semitone spectrum
|
Chris@23
|
604 Feature f4; // treble chromagram
|
Chris@23
|
605 Feature f5; // bass chromagram
|
Chris@23
|
606 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
607
|
Chris@23
|
608 f3.hasTimestamp = true;
|
Chris@23
|
609 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
610
|
Chris@23
|
611 f4.hasTimestamp = true;
|
Chris@23
|
612 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
613
|
Chris@23
|
614 f5.hasTimestamp = true;
|
Chris@23
|
615 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
616
|
Chris@23
|
617 f6.hasTimestamp = true;
|
Chris@23
|
618 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
619
|
Chris@29
|
620 float b[256];
|
matthiasm@1
|
621
|
Chris@23
|
622 bool some_b_greater_zero = false;
|
Chris@23
|
623 float sumb = 0;
|
Chris@23
|
624 for (int i = 0; i < 256; i++) {
|
Chris@23
|
625 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
626 b[i] = f2.values[i];
|
Chris@23
|
627 sumb += b[i];
|
Chris@23
|
628 if (b[i] > 0) {
|
Chris@23
|
629 some_b_greater_zero = true;
|
Chris@23
|
630 }
|
Chris@23
|
631 }
|
matthiasm@1
|
632
|
Chris@23
|
633 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
634
|
Chris@23
|
635 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
636 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
637 float currval;
|
Chris@23
|
638 unsigned iSemitone = 0;
|
matthiasm@1
|
639
|
Chris@23
|
640 if (some_b_greater_zero) {
|
Chris@23
|
641 if (m_dictID == 1) {
|
Chris@23
|
642 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
643 currval = 0;
|
Chris@23
|
644 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
645 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
646 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
647 f3.values.push_back(currval);
|
Chris@23
|
648 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
649 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
650 iSemitone++;
|
Chris@23
|
651 }
|
matthiasm@1
|
652
|
Chris@23
|
653 } else {
|
Chris@29
|
654 float x[84+1000];
|
Chris@23
|
655 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
656 vector<int> signifIndex;
|
Chris@23
|
657 int index=0;
|
Chris@23
|
658 sumb /= 84.0;
|
Chris@23
|
659 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
660 float currval = 0;
|
Chris@23
|
661 currval += b[iNote + 1 + -1];
|
Chris@23
|
662 currval += b[iNote + 1 + 0];
|
Chris@23
|
663 currval += b[iNote + 1 + 1];
|
Chris@23
|
664 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
665 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
666 index++;
|
Chris@23
|
667 }
|
Chris@29
|
668 float rnorm;
|
Chris@29
|
669 float w[84+1000];
|
Chris@29
|
670 float zz[84+1000];
|
Chris@23
|
671 int indx[84+1000];
|
Chris@23
|
672 int mode;
|
Chris@23
|
673 int dictsize = 256*signifIndex.size();
|
Chris@23
|
674 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
675 float *curr_dict = new float[dictsize];
|
Chris@23
|
676 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
677 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
678 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
679 }
|
Chris@23
|
680 }
|
Chris@29
|
681 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
682 delete [] curr_dict;
|
Chris@23
|
683 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
684 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
685 // cerr << mode << endl;
|
Chris@23
|
686 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
687 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
688 }
|
Chris@23
|
689 }
|
Chris@23
|
690 }
|
matthiasm@13
|
691
|
matthiasm@10
|
692
|
matthiasm@12
|
693
|
matthiasm@13
|
694
|
Chris@23
|
695 f4.values = chroma;
|
Chris@23
|
696 f5.values = basschroma;
|
Chris@23
|
697 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
698 f6.values = chroma;
|
matthiasm@1
|
699
|
Chris@23
|
700 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
701 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
702 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
703 case 0: // should never end up here
|
Chris@23
|
704 break;
|
Chris@23
|
705 case 1:
|
Chris@23
|
706 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
707 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
708 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
709 break;
|
Chris@23
|
710 case 2:
|
Chris@23
|
711 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
712 chromanorm[0] += *it;
|
Chris@23
|
713 }
|
Chris@23
|
714 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
715 chromanorm[1] += *it;
|
Chris@23
|
716 }
|
Chris@23
|
717 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
718 chromanorm[2] += *it;
|
Chris@23
|
719 }
|
Chris@23
|
720 break;
|
Chris@23
|
721 case 3:
|
Chris@23
|
722 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
723 chromanorm[0] += pow(*it,2);
|
Chris@23
|
724 }
|
Chris@23
|
725 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
726 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
727 chromanorm[1] += pow(*it,2);
|
Chris@23
|
728 }
|
Chris@23
|
729 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
730 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
731 chromanorm[2] += pow(*it,2);
|
Chris@23
|
732 }
|
Chris@23
|
733 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
734 break;
|
Chris@23
|
735 }
|
Chris@23
|
736 if (chromanorm[0] > 0) {
|
Chris@23
|
737 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
738 f4.values[i] /= chromanorm[0];
|
Chris@23
|
739 }
|
Chris@23
|
740 }
|
Chris@23
|
741 if (chromanorm[1] > 0) {
|
Chris@23
|
742 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
743 f5.values[i] /= chromanorm[1];
|
Chris@23
|
744 }
|
Chris@23
|
745 }
|
Chris@23
|
746 if (chromanorm[2] > 0) {
|
Chris@23
|
747 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
748 f6.values[i] /= chromanorm[2];
|
Chris@23
|
749 }
|
Chris@23
|
750 }
|
matthiasm@13
|
751
|
Chris@23
|
752 }
|
matthiasm@13
|
753
|
Chris@23
|
754 // local chord estimation
|
Chris@23
|
755 vector<float> currentChordSalience;
|
Chris@23
|
756 float tempchordvalue = 0;
|
Chris@23
|
757 float sumchordvalue = 0;
|
matthiasm@9
|
758
|
Chris@23
|
759 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
760 tempchordvalue = 0;
|
Chris@23
|
761 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
762 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
763 }
|
Chris@23
|
764 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
765 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
766 }
|
Chris@23
|
767 sumchordvalue+=tempchordvalue;
|
Chris@23
|
768 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
769 }
|
Chris@23
|
770 if (sumchordvalue > 0) {
|
Chris@23
|
771 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
772 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
773 }
|
Chris@23
|
774 } else {
|
Chris@23
|
775 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
776 }
|
Chris@23
|
777 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
778
|
Chris@23
|
779 fsOut[3].push_back(f3);
|
Chris@23
|
780 fsOut[4].push_back(f4);
|
Chris@23
|
781 fsOut[5].push_back(f5);
|
Chris@23
|
782 fsOut[6].push_back(f6);
|
Chris@23
|
783 count++;
|
Chris@23
|
784 }
|
Chris@23
|
785 cerr << "done." << endl;
|
matthiasm@13
|
786
|
matthiasm@10
|
787
|
Chris@23
|
788 /* Simple chord estimation
|
Chris@23
|
789 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
790 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
791 */
|
Chris@23
|
792 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
793 count = 0;
|
Chris@23
|
794 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
795 vector<int> chordSequence;
|
Chris@23
|
796 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
797 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
798 scoreChordogram.push_back(temp);
|
Chris@23
|
799 }
|
Chris@23
|
800 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
801 int startIndex = count + 1;
|
Chris@23
|
802 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
803
|
Chris@23
|
804 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
805
|
Chris@23
|
806 vector<int> chordCandidates;
|
Chris@23
|
807 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
808 // float currsum = 0;
|
Chris@23
|
809 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
810 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
811 // }
|
Chris@23
|
812 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
813 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
814 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
815 chordCandidates.push_back(iChord);
|
Chris@23
|
816 break;
|
Chris@23
|
817 }
|
Chris@23
|
818 }
|
Chris@23
|
819 }
|
Chris@23
|
820 chordCandidates.push_back(nChord-1);
|
Chris@23
|
821 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
822
|
Chris@23
|
823 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
824 float maxindex = 0; //... and the index thereof
|
Chris@23
|
825 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
826 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
827
|
Chris@23
|
828 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
829 // now find the max values on both sides of iWF
|
Chris@23
|
830 // left side:
|
Chris@23
|
831 float maxL = 0;
|
Chris@23
|
832 unsigned maxindL = nChord-1;
|
Chris@23
|
833 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
834 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
835 float currsum = 0;
|
Chris@23
|
836 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
837 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
838 }
|
Chris@23
|
839 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
840 if (currsum > maxL) {
|
Chris@23
|
841 maxL = currsum;
|
Chris@23
|
842 maxindL = iChord;
|
Chris@23
|
843 }
|
Chris@23
|
844 }
|
Chris@23
|
845 // right side:
|
Chris@23
|
846 float maxR = 0;
|
Chris@23
|
847 unsigned maxindR = nChord-1;
|
Chris@23
|
848 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
849 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
850 float currsum = 0;
|
Chris@23
|
851 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
852 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
853 }
|
Chris@23
|
854 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
855 if (currsum > maxR) {
|
Chris@23
|
856 maxR = currsum;
|
Chris@23
|
857 maxindR = iChord;
|
Chris@23
|
858 }
|
Chris@23
|
859 }
|
Chris@23
|
860 if (maxL+maxR > maxval) {
|
Chris@23
|
861 maxval = maxL+maxR;
|
Chris@23
|
862 maxindex = iWF;
|
Chris@23
|
863 bestchordL = maxindL;
|
Chris@23
|
864 bestchordR = maxindR;
|
Chris@23
|
865 }
|
matthiasm@3
|
866
|
Chris@23
|
867 }
|
Chris@23
|
868 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
869 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
870 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
871 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
872 }
|
Chris@23
|
873 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
874 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
875 }
|
Chris@23
|
876 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
877 count++;
|
Chris@23
|
878 }
|
Chris@23
|
879 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
880 count = 0;
|
Chris@23
|
881 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
882 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
883 float maxindex = 0; //... and the index thereof
|
Chris@23
|
884 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
885 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
886 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
887 maxindex = iChord;
|
Chris@23
|
888 // cerr << iChord << endl;
|
Chris@23
|
889 }
|
Chris@23
|
890 }
|
Chris@23
|
891 chordSequence.push_back(maxindex);
|
Chris@23
|
892 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
893 count++;
|
Chris@23
|
894 }
|
Chris@23
|
895 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
896
|
matthiasm@3
|
897
|
Chris@23
|
898 // mode filter on chordSequence
|
Chris@23
|
899 count = 0;
|
Chris@23
|
900 string oldChord = "";
|
Chris@23
|
901 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
902 Feature f6 = *it;
|
Chris@23
|
903 Feature f7; // chord estimate
|
Chris@23
|
904 f7.hasTimestamp = true;
|
Chris@23
|
905 f7.timestamp = f6.timestamp;
|
Chris@23
|
906 Feature f8; // chord estimate
|
Chris@23
|
907 f8.hasTimestamp = true;
|
Chris@23
|
908 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
909
|
Chris@23
|
910 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
911 int maxChordCount = 0;
|
Chris@23
|
912 int maxChordIndex = nChord-1;
|
Chris@23
|
913 string maxChord;
|
Chris@23
|
914 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
915 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
916 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
917 chordCount[chordSequence[i]]++;
|
Chris@23
|
918 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
919 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
920 maxChordCount++;
|
Chris@23
|
921 maxChordIndex = chordSequence[i];
|
Chris@23
|
922 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
923 }
|
Chris@23
|
924 }
|
Chris@23
|
925 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
926 // cerr << maxChordIndex << endl;
|
Chris@23
|
927 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
928 // cerr << chordchange[count] << endl;
|
Chris@23
|
929 fsOut[9].push_back(f8);
|
Chris@23
|
930 if (oldChord != maxChord) {
|
Chris@23
|
931 oldChord = maxChord;
|
matthiasm@3
|
932
|
Chris@23
|
933 // char buffer1 [50];
|
Chris@23
|
934 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
935 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
936 // } else {
|
Chris@23
|
937 // sprintf(buffer1, "N");
|
Chris@23
|
938 // }
|
Chris@23
|
939 // f7.label = buffer1;
|
Chris@23
|
940 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
941 fsOut[7].push_back(f7);
|
Chris@23
|
942 }
|
Chris@23
|
943 count++;
|
Chris@23
|
944 }
|
Chris@23
|
945 Feature f7; // last chord estimate
|
Chris@23
|
946 f7.hasTimestamp = true;
|
Chris@23
|
947 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
948 f7.label = "N";
|
Chris@23
|
949 fsOut[7].push_back(f7);
|
Chris@23
|
950 cerr << "done." << endl;
|
Chris@23
|
951 // // musicity
|
Chris@23
|
952 // count = 0;
|
Chris@23
|
953 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
954 // vector<float> musicityValue;
|
Chris@23
|
955 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
956 // Feature f4 = *it;
|
Chris@23
|
957 //
|
Chris@23
|
958 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
959 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
960 // float chromasum = 0;
|
Chris@23
|
961 // float diffsum = 0;
|
Chris@23
|
962 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
963 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
964 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
965 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
966 // }
|
Chris@23
|
967 // }
|
Chris@23
|
968 // diffsum /= chromasum;
|
Chris@23
|
969 // musicityValue.push_back(diffsum);
|
Chris@23
|
970 // count++;
|
Chris@23
|
971 // }
|
Chris@23
|
972 //
|
Chris@23
|
973 // float musicityThreshold = 0.44;
|
Chris@23
|
974 // if (m_stepSize == 4096) {
|
Chris@23
|
975 // musicityThreshold = 0.74;
|
Chris@23
|
976 // }
|
Chris@23
|
977 // if (m_stepSize == 4410) {
|
Chris@23
|
978 // musicityThreshold = 0.77;
|
Chris@23
|
979 // }
|
Chris@23
|
980 //
|
Chris@23
|
981 // count = 0;
|
Chris@23
|
982 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
983 // Feature f4 = *it;
|
Chris@23
|
984 // Feature f8; // musicity
|
Chris@23
|
985 // Feature f9; // musicity segmenter
|
Chris@23
|
986 //
|
Chris@23
|
987 // f8.hasTimestamp = true;
|
Chris@23
|
988 // f8.timestamp = f4.timestamp;
|
Chris@23
|
989 // f9.hasTimestamp = true;
|
Chris@23
|
990 // f9.timestamp = f4.timestamp;
|
Chris@23
|
991 //
|
Chris@23
|
992 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
993 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
994 // int musicityCount = 0;
|
Chris@23
|
995 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
996 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
997 // }
|
Chris@23
|
998 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
999 //
|
Chris@23
|
1000 // if (isSpeech) {
|
Chris@23
|
1001 // if (oldlabeltype != 2) {
|
Chris@23
|
1002 // f9.label = "Speech";
|
Chris@23
|
1003 // fsOut[9].push_back(f9);
|
Chris@23
|
1004 // oldlabeltype = 2;
|
Chris@23
|
1005 // }
|
Chris@23
|
1006 // } else {
|
Chris@23
|
1007 // if (oldlabeltype != 1) {
|
Chris@23
|
1008 // f9.label = "Music";
|
Chris@23
|
1009 // fsOut[9].push_back(f9);
|
Chris@23
|
1010 // oldlabeltype = 1;
|
Chris@23
|
1011 // }
|
Chris@23
|
1012 // }
|
Chris@23
|
1013 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1014 // fsOut[8].push_back(f8);
|
Chris@23
|
1015 // count++;
|
Chris@23
|
1016 // }
|
Chris@23
|
1017 return fsOut;
|
matthiasm@0
|
1018
|
matthiasm@0
|
1019 }
|
matthiasm@0
|
1020
|
Chris@35
|
1021 #endif
|