Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@35
|
31 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
32 Plugin(inputSampleRate),
|
Chris@35
|
33 m_logSpectrum(0),
|
Chris@23
|
34 m_blockSize(0),
|
Chris@23
|
35 m_stepSize(0),
|
Chris@23
|
36 m_lengthOfNoteIndex(0),
|
Chris@23
|
37 m_meanTuning0(0),
|
Chris@23
|
38 m_meanTuning1(0),
|
Chris@23
|
39 m_meanTuning2(0),
|
Chris@23
|
40 m_localTuning0(0),
|
Chris@23
|
41 m_localTuning1(0),
|
Chris@23
|
42 m_localTuning2(0),
|
mail@41
|
43 m_whitening(1.0),
|
Chris@23
|
44 m_preset(0.0),
|
Chris@23
|
45 m_localTuning(0),
|
Chris@23
|
46 m_kernelValue(0),
|
Chris@23
|
47 m_kernelFftIndex(0),
|
Chris@23
|
48 m_kernelNoteIndex(0),
|
Chris@23
|
49 m_dict(0),
|
mail@60
|
50 m_tuneLocal(0),
|
Chris@23
|
51 m_chorddict(0),
|
Chris@23
|
52 m_chordnames(0),
|
Chris@23
|
53 m_doNormalizeChroma(0),
|
mail@60
|
54 m_rollon(0),
|
matthiasm@42
|
55 m_s(0.7),
|
matthiasm@50
|
56 m_useNNLS(1),
|
matthiasm@50
|
57 m_useHMM(1)
|
matthiasm@0
|
58 {
|
Chris@35
|
59 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
60
|
Chris@23
|
61 // make the *note* dictionary matrix
|
Chris@23
|
62 m_dict = new float[nNote * 84];
|
Chris@23
|
63 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
64 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@7
|
65
|
Chris@23
|
66 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
67 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69
|
matthiasm@0
|
70
|
Chris@35
|
71 NNLSBase::~NNLSBase()
|
matthiasm@0
|
72 {
|
Chris@35
|
73 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
74 delete [] m_dict;
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76
|
matthiasm@0
|
77 string
|
Chris@35
|
78 NNLSBase::getMaker() const
|
matthiasm@0
|
79 {
|
Chris@23
|
80 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
81 // Your name here
|
matthiasm@0
|
82 return "Matthias Mauch";
|
matthiasm@0
|
83 }
|
matthiasm@0
|
84
|
matthiasm@0
|
85 int
|
Chris@35
|
86 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
87 {
|
Chris@23
|
88 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
89 // Increment this each time you release a version that behaves
|
matthiasm@0
|
90 // differently from the previous one
|
matthiasm@0
|
91 return 1;
|
matthiasm@0
|
92 }
|
matthiasm@0
|
93
|
matthiasm@0
|
94 string
|
Chris@35
|
95 NNLSBase::getCopyright() const
|
matthiasm@0
|
96 {
|
Chris@23
|
97 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
98 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
99 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
100 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
101 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
102 return "GPL";
|
matthiasm@0
|
103 }
|
matthiasm@0
|
104
|
Chris@35
|
105 NNLSBase::InputDomain
|
Chris@35
|
106 NNLSBase::getInputDomain() const
|
matthiasm@0
|
107 {
|
Chris@23
|
108 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
109 return FrequencyDomain;
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111
|
matthiasm@0
|
112 size_t
|
Chris@35
|
113 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
114 {
|
Chris@23
|
115 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
116 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
117 }
|
matthiasm@0
|
118
|
matthiasm@0
|
119 size_t
|
Chris@35
|
120 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
121 {
|
Chris@23
|
122 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
123 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
124 // means the same as the block size for TimeDomain
|
Chris@23
|
125 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
126 }
|
matthiasm@0
|
127
|
matthiasm@0
|
128 size_t
|
Chris@35
|
129 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
130 {
|
Chris@23
|
131 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
132 return 1;
|
matthiasm@0
|
133 }
|
matthiasm@0
|
134
|
matthiasm@0
|
135 size_t
|
Chris@35
|
136 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
137 {
|
Chris@23
|
138 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
139 return 1;
|
matthiasm@0
|
140 }
|
matthiasm@0
|
141
|
Chris@35
|
142 NNLSBase::ParameterList
|
Chris@35
|
143 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
144 {
|
Chris@23
|
145 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
146 ParameterList list;
|
matthiasm@0
|
147
|
matthiasm@42
|
148 ParameterDescriptor d;
|
matthiasm@42
|
149 d.identifier = "useNNLS";
|
matthiasm@42
|
150 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
151 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
152 d.unit = "";
|
matthiasm@42
|
153 d.minValue = 0.0;
|
matthiasm@42
|
154 d.maxValue = 1.0;
|
matthiasm@42
|
155 d.defaultValue = 1.0;
|
matthiasm@42
|
156 d.isQuantized = true;
|
matthiasm@42
|
157 d.quantizeStep = 1.0;
|
matthiasm@42
|
158 list.push_back(d);
|
matthiasm@42
|
159
|
mail@41
|
160 ParameterDescriptor d0;
|
mail@41
|
161 d0.identifier = "rollon";
|
mail@41
|
162 d0.name = "spectral roll-on";
|
matthiasm@58
|
163 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
164 d0.unit = "%";
|
mail@41
|
165 d0.minValue = 0;
|
matthiasm@59
|
166 d0.maxValue = 5;
|
mail@41
|
167 d0.defaultValue = 0;
|
matthiasm@48
|
168 d0.isQuantized = true;
|
matthiasm@59
|
169 d0.quantizeStep = 0.5;
|
mail@41
|
170 list.push_back(d0);
|
matthiasm@4
|
171
|
matthiasm@4
|
172 ParameterDescriptor d1;
|
matthiasm@4
|
173 d1.identifier = "tuningmode";
|
matthiasm@4
|
174 d1.name = "tuning mode";
|
matthiasm@4
|
175 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
176 d1.unit = "";
|
matthiasm@4
|
177 d1.minValue = 0;
|
matthiasm@4
|
178 d1.maxValue = 1;
|
matthiasm@4
|
179 d1.defaultValue = 0;
|
matthiasm@4
|
180 d1.isQuantized = true;
|
matthiasm@4
|
181 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
182 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
183 d1.quantizeStep = 1.0;
|
matthiasm@4
|
184 list.push_back(d1);
|
matthiasm@4
|
185
|
mail@41
|
186 ParameterDescriptor d2;
|
mail@41
|
187 d2.identifier = "whitening";
|
mail@41
|
188 d2.name = "spectral whitening";
|
mail@41
|
189 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
190 d2.unit = "";
|
mail@41
|
191 d2.isQuantized = true;
|
mail@41
|
192 d2.minValue = 0.0;
|
mail@41
|
193 d2.maxValue = 1.0;
|
mail@41
|
194 d2.defaultValue = 1.0;
|
mail@41
|
195 d2.isQuantized = false;
|
mail@41
|
196 list.push_back(d2);
|
mail@41
|
197
|
mail@41
|
198 ParameterDescriptor d3;
|
mail@41
|
199 d3.identifier = "s";
|
mail@41
|
200 d3.name = "spectral shape";
|
mail@41
|
201 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
202 d3.unit = "";
|
mail@41
|
203 d3.minValue = 0.5;
|
mail@41
|
204 d3.maxValue = 0.9;
|
mail@41
|
205 d3.defaultValue = 0.7;
|
mail@41
|
206 d3.isQuantized = false;
|
mail@41
|
207 list.push_back(d3);
|
mail@41
|
208
|
Chris@23
|
209 ParameterDescriptor d4;
|
matthiasm@12
|
210 d4.identifier = "chromanormalize";
|
matthiasm@12
|
211 d4.name = "chroma normalization";
|
matthiasm@12
|
212 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
213 d4.unit = "";
|
matthiasm@12
|
214 d4.minValue = 0;
|
matthiasm@13
|
215 d4.maxValue = 3;
|
matthiasm@12
|
216 d4.defaultValue = 0;
|
matthiasm@12
|
217 d4.isQuantized = true;
|
matthiasm@13
|
218 d4.valueNames.push_back("none");
|
matthiasm@13
|
219 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
220 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
221 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
222 d4.quantizeStep = 1.0;
|
matthiasm@12
|
223 list.push_back(d4);
|
matthiasm@4
|
224
|
matthiasm@0
|
225 return list;
|
matthiasm@0
|
226 }
|
matthiasm@0
|
227
|
matthiasm@0
|
228 float
|
Chris@35
|
229 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
230 {
|
Chris@23
|
231 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
232 if (identifier == "useNNLS") {
|
matthiasm@42
|
233 return m_useNNLS;
|
matthiasm@0
|
234 }
|
matthiasm@0
|
235
|
mail@41
|
236 if (identifier == "whitening") {
|
mail@41
|
237 return m_whitening;
|
mail@41
|
238 }
|
mail@41
|
239
|
mail@41
|
240 if (identifier == "s") {
|
mail@41
|
241 return m_s;
|
matthiasm@0
|
242 }
|
matthiasm@17
|
243
|
Chris@23
|
244 if (identifier == "rollon") {
|
matthiasm@17
|
245 return m_rollon;
|
matthiasm@17
|
246 }
|
matthiasm@0
|
247
|
matthiasm@0
|
248 if (identifier == "tuningmode") {
|
matthiasm@0
|
249 if (m_tuneLocal) {
|
matthiasm@0
|
250 return 1.0;
|
matthiasm@0
|
251 } else {
|
matthiasm@0
|
252 return 0.0;
|
matthiasm@0
|
253 }
|
matthiasm@0
|
254 }
|
Chris@23
|
255 if (identifier == "preset") {
|
Chris@23
|
256 return m_preset;
|
matthiasm@3
|
257 }
|
Chris@23
|
258 if (identifier == "chromanormalize") {
|
Chris@23
|
259 return m_doNormalizeChroma;
|
matthiasm@12
|
260 }
|
matthiasm@50
|
261
|
matthiasm@50
|
262 if (identifier == "useHMM") {
|
matthiasm@50
|
263 return m_useHMM;
|
matthiasm@50
|
264 }
|
matthiasm@50
|
265
|
matthiasm@0
|
266 return 0;
|
matthiasm@0
|
267
|
matthiasm@0
|
268 }
|
matthiasm@0
|
269
|
matthiasm@0
|
270 void
|
Chris@35
|
271 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
272 {
|
Chris@23
|
273 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
274 if (identifier == "useNNLS") {
|
matthiasm@42
|
275 m_useNNLS = (int) value;
|
matthiasm@0
|
276 }
|
matthiasm@0
|
277
|
mail@41
|
278 if (identifier == "whitening") {
|
mail@41
|
279 m_whitening = value;
|
matthiasm@0
|
280 }
|
matthiasm@0
|
281
|
mail@41
|
282 if (identifier == "s") {
|
mail@41
|
283 m_s = value;
|
mail@41
|
284 }
|
mail@41
|
285
|
matthiasm@50
|
286 if (identifier == "useHMM") {
|
matthiasm@50
|
287 m_useHMM = value;
|
matthiasm@50
|
288 }
|
matthiasm@50
|
289
|
matthiasm@0
|
290 if (identifier == "tuningmode") {
|
mail@60
|
291 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
292 m_tuneLocal = value;
|
matthiasm@0
|
293 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
294 }
|
matthiasm@42
|
295 // if (identifier == "preset") {
|
matthiasm@42
|
296 // m_preset = value;
|
matthiasm@42
|
297 // if (m_preset == 0.0) {
|
matthiasm@42
|
298 // m_tuneLocal = false;
|
matthiasm@42
|
299 // m_whitening = 1.0;
|
matthiasm@42
|
300 // m_dictID = 0.0;
|
matthiasm@42
|
301 // }
|
matthiasm@42
|
302 // if (m_preset == 1.0) {
|
matthiasm@42
|
303 // m_tuneLocal = false;
|
matthiasm@42
|
304 // m_whitening = 1.0;
|
matthiasm@42
|
305 // m_dictID = 1.0;
|
matthiasm@42
|
306 // }
|
matthiasm@42
|
307 // if (m_preset == 2.0) {
|
matthiasm@42
|
308 // m_tuneLocal = false;
|
matthiasm@42
|
309 // m_whitening = 0.7;
|
matthiasm@42
|
310 // m_dictID = 0.0;
|
matthiasm@42
|
311 // }
|
matthiasm@42
|
312 // }
|
Chris@23
|
313 if (identifier == "chromanormalize") {
|
Chris@23
|
314 m_doNormalizeChroma = value;
|
Chris@23
|
315 }
|
matthiasm@17
|
316
|
Chris@23
|
317 if (identifier == "rollon") {
|
Chris@23
|
318 m_rollon = value;
|
Chris@23
|
319 }
|
matthiasm@0
|
320 }
|
matthiasm@0
|
321
|
Chris@35
|
322 NNLSBase::ProgramList
|
Chris@35
|
323 NNLSBase::getPrograms() const
|
matthiasm@0
|
324 {
|
Chris@23
|
325 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
326 ProgramList list;
|
matthiasm@0
|
327
|
matthiasm@0
|
328 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
329 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
330
|
matthiasm@0
|
331 return list;
|
matthiasm@0
|
332 }
|
matthiasm@0
|
333
|
matthiasm@0
|
334 string
|
Chris@35
|
335 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
336 {
|
Chris@23
|
337 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
338 return ""; // no programs
|
matthiasm@0
|
339 }
|
matthiasm@0
|
340
|
matthiasm@0
|
341 void
|
Chris@35
|
342 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
343 {
|
Chris@23
|
344 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
345 }
|
matthiasm@0
|
346
|
matthiasm@0
|
347
|
matthiasm@0
|
348 bool
|
Chris@35
|
349 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
350 {
|
Chris@23
|
351 if (debug_on) {
|
Chris@23
|
352 cerr << "--> initialise";
|
Chris@23
|
353 }
|
matthiasm@1
|
354
|
mail@76
|
355 int hamwinlength = nBPS * 6 + 1;
|
mail@76
|
356 float hamwinsum = 0;
|
mail@76
|
357 for (int i = 0; i < hamwinlength; ++i) {
|
mail@76
|
358 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
mail@76
|
359 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
mail@76
|
360 }
|
mail@76
|
361 for (int i = 0; i < hamwinlength; ++i) hw[i] = round(hw[i] / hamwinsum * 10000)*1.0/10000;
|
mail@76
|
362
|
matthiasm@0
|
363 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
364 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
365 m_blockSize = blockSize;
|
matthiasm@0
|
366 m_stepSize = stepSize;
|
Chris@35
|
367 m_frameCount = 0;
|
Chris@23
|
368 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
369 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
370 float *tempkernel;
|
matthiasm@1
|
371
|
Chris@23
|
372 tempkernel = new float[tempn];
|
matthiasm@1
|
373
|
Chris@23
|
374 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
375 m_kernelValue.clear();
|
Chris@23
|
376 m_kernelFftIndex.clear();
|
Chris@23
|
377 m_kernelNoteIndex.clear();
|
Chris@23
|
378 int countNonzero = 0;
|
Chris@23
|
379 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
380 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
381 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
382 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
383 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
384 countNonzero++;
|
Chris@23
|
385 }
|
Chris@23
|
386 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
387 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
388 }
|
Chris@23
|
389 }
|
Chris@23
|
390 }
|
Chris@23
|
391 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
392 delete [] tempkernel;
|
Chris@35
|
393 /*
|
Chris@23
|
394 ofstream myfile;
|
Chris@23
|
395 myfile.open ("matrix.txt");
|
matthiasm@3
|
396 // myfile << "Writing this to a file.\n";
|
Chris@23
|
397 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
398 myfile << m_dict[i] << endl;
|
Chris@23
|
399 }
|
matthiasm@3
|
400 myfile.close();
|
Chris@35
|
401 */
|
matthiasm@0
|
402 return true;
|
matthiasm@0
|
403 }
|
matthiasm@0
|
404
|
matthiasm@0
|
405 void
|
Chris@35
|
406 NNLSBase::reset()
|
matthiasm@0
|
407 {
|
Chris@23
|
408 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
409
|
matthiasm@0
|
410 // Clear buffers, reset stored values, etc
|
Chris@35
|
411 m_frameCount = 0;
|
matthiasm@42
|
412 // m_dictID = 0;
|
Chris@35
|
413 m_logSpectrum.clear();
|
Chris@23
|
414 m_meanTuning0 = 0;
|
Chris@23
|
415 m_meanTuning1 = 0;
|
Chris@23
|
416 m_meanTuning2 = 0;
|
Chris@23
|
417 m_localTuning0 = 0;
|
Chris@23
|
418 m_localTuning1 = 0;
|
Chris@23
|
419 m_localTuning2 = 0;
|
Chris@23
|
420 m_localTuning.clear();
|
matthiasm@0
|
421 }
|
matthiasm@0
|
422
|
Chris@35
|
423 void
|
Chris@35
|
424 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
425 {
|
Chris@35
|
426 m_frameCount++;
|
Chris@23
|
427 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
428
|
Chris@23
|
429 const float *fbuf = inputBuffers[0];
|
Chris@23
|
430 float energysum = 0;
|
Chris@23
|
431 // make magnitude
|
Chris@23
|
432 float maxmag = -10000;
|
Chris@23
|
433 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
434 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
435 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
436 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
437 if (m_rollon > 0) {
|
Chris@23
|
438 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
439 }
|
Chris@23
|
440 }
|
matthiasm@14
|
441
|
Chris@23
|
442 float cumenergy = 0;
|
Chris@23
|
443 if (m_rollon > 0) {
|
Chris@23
|
444 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
445 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
446 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
447 else break;
|
Chris@23
|
448 }
|
Chris@23
|
449 }
|
matthiasm@17
|
450
|
Chris@23
|
451 if (maxmag < 2) {
|
Chris@23
|
452 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
453 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
454 magnitude[iBin] = 0;
|
Chris@23
|
455 }
|
Chris@23
|
456 }
|
matthiasm@4
|
457
|
Chris@23
|
458 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
459 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
460 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
461 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
462 }
|
Chris@23
|
463 int binCount = 0;
|
Chris@23
|
464 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
465 // cerr << ".";
|
Chris@23
|
466 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
467 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
468 binCount++;
|
Chris@23
|
469 }
|
Chris@23
|
470 // cerr << nm[20];
|
Chris@23
|
471 // cerr << endl;
|
matthiasm@0
|
472
|
matthiasm@0
|
473
|
Chris@35
|
474 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
475 // update means of complex tuning variables
|
Chris@35
|
476 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
477 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
478 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
|
matthiasm@0
|
479
|
matthiasm@0
|
480 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
481 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
482 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
483 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
484 float ratioOld = 0.997;
|
matthiasm@3
|
485 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
486 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
487 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
488 }
|
matthiasm@0
|
489
|
matthiasm@0
|
490 // if (m_tuneLocal) {
|
Chris@23
|
491 // local tuning
|
Chris@23
|
492 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
493 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
494 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
495 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
496
|
Chris@23
|
497 Feature f1; // logfreqspec
|
Chris@23
|
498 f1.hasTimestamp = true;
|
matthiasm@0
|
499 f1.timestamp = timestamp;
|
Chris@23
|
500 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
501 f1.values.push_back(nm[iNote]);
|
Chris@23
|
502 }
|
matthiasm@0
|
503
|
matthiasm@0
|
504 // deletes
|
matthiasm@0
|
505 delete[] magnitude;
|
matthiasm@0
|
506 delete[] nm;
|
matthiasm@0
|
507
|
Chris@35
|
508 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
509 }
|
matthiasm@0
|
510
|
Chris@35
|
511
|
Chris@35
|
512 #ifdef NOT_DEFINED
|
Chris@35
|
513
|
Chris@35
|
514 NNLSBase::FeatureSet
|
Chris@35
|
515 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
516 {
|
Chris@23
|
517 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
518 FeatureSet fsOut;
|
Chris@35
|
519 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
520 int nChord = m_chordnames.size();
|
Chris@23
|
521 //
|
Chris@23
|
522 /** Calculate Tuning
|
Chris@23
|
523 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
524 cumulative mean real and imag values)
|
Chris@23
|
525 **/
|
Chris@23
|
526 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
527 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
528 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
529 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
530 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
531 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
532
|
Chris@23
|
533 char buffer0 [50];
|
matthiasm@1
|
534
|
Chris@23
|
535 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
536
|
Chris@23
|
537 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
538
|
Chris@23
|
539 // push tuning to FeatureSet fsOut
|
Chris@23
|
540 Feature f0; // tuning
|
Chris@23
|
541 f0.hasTimestamp = true;
|
Chris@23
|
542 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
543 f0.label = buffer0;
|
Chris@23
|
544 fsOut[0].push_back(f0);
|
matthiasm@1
|
545
|
Chris@23
|
546 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
547 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
548 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
549 **/
|
Chris@23
|
550 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
551
|
Chris@23
|
552 float tempValue = 0;
|
Chris@23
|
553 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
554 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
555 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
556 int count = 0;
|
matthiasm@1
|
557
|
Chris@35
|
558 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
559 Feature f1 = *i;
|
Chris@23
|
560 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
561 f2.hasTimestamp = true;
|
Chris@23
|
562 f2.timestamp = f1.timestamp;
|
Chris@23
|
563 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
564
|
mail@60
|
565 if (m_tuneLocal == 1.0) {
|
Chris@23
|
566 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
567 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
568 }
|
matthiasm@1
|
569
|
Chris@23
|
570 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
571
|
Chris@23
|
572 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
573 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
574 f2.values.push_back(tempValue);
|
Chris@23
|
575 }
|
matthiasm@1
|
576
|
Chris@23
|
577 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
578 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
579 vector<float> runningstd;
|
Chris@23
|
580 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
581 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
582 }
|
Chris@23
|
583 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
584 for (int i = 0; i < 256; i++) {
|
Chris@23
|
585 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
586 if (runningstd[i] > 0) {
|
Chris@23
|
587 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@41
|
588 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
589 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@41
|
590 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
591 }
|
Chris@23
|
592 if (f2.values[i] < 0) {
|
Chris@23
|
593 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
594 }
|
Chris@23
|
595 }
|
Chris@23
|
596 fsOut[2].push_back(f2);
|
Chris@23
|
597 count++;
|
Chris@23
|
598 }
|
Chris@23
|
599 cerr << "done." << endl;
|
matthiasm@1
|
600
|
Chris@23
|
601 /** Semitone spectrum and chromagrams
|
Chris@23
|
602 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
603 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
604 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
605 bass and treble stacked onto each other).
|
Chris@23
|
606 **/
|
matthiasm@42
|
607 if (m_useNNLS == 0) {
|
Chris@23
|
608 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
609 } else {
|
Chris@23
|
610 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
611 }
|
matthiasm@13
|
612
|
matthiasm@1
|
613
|
Chris@23
|
614 vector<vector<float> > chordogram;
|
Chris@23
|
615 vector<vector<int> > scoreChordogram;
|
Chris@23
|
616 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
617 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
618 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
619 count = 0;
|
matthiasm@9
|
620
|
Chris@23
|
621 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
622 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
623 Feature f3; // semitone spectrum
|
Chris@23
|
624 Feature f4; // treble chromagram
|
Chris@23
|
625 Feature f5; // bass chromagram
|
Chris@23
|
626 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
627
|
Chris@23
|
628 f3.hasTimestamp = true;
|
Chris@23
|
629 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
630
|
Chris@23
|
631 f4.hasTimestamp = true;
|
Chris@23
|
632 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
633
|
Chris@23
|
634 f5.hasTimestamp = true;
|
Chris@23
|
635 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
636
|
Chris@23
|
637 f6.hasTimestamp = true;
|
Chris@23
|
638 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
639
|
Chris@29
|
640 float b[256];
|
matthiasm@1
|
641
|
Chris@23
|
642 bool some_b_greater_zero = false;
|
Chris@23
|
643 float sumb = 0;
|
Chris@23
|
644 for (int i = 0; i < 256; i++) {
|
Chris@23
|
645 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
646 b[i] = f2.values[i];
|
Chris@23
|
647 sumb += b[i];
|
Chris@23
|
648 if (b[i] > 0) {
|
Chris@23
|
649 some_b_greater_zero = true;
|
Chris@23
|
650 }
|
Chris@23
|
651 }
|
matthiasm@1
|
652
|
Chris@23
|
653 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
654
|
Chris@23
|
655 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
656 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
657 float currval;
|
Chris@23
|
658 unsigned iSemitone = 0;
|
matthiasm@1
|
659
|
Chris@23
|
660 if (some_b_greater_zero) {
|
matthiasm@42
|
661 if (m_useNNLS == 0) {
|
Chris@23
|
662 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
663 currval = 0;
|
Chris@23
|
664 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
665 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
666 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
667 f3.values.push_back(currval);
|
Chris@23
|
668 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
669 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
670 iSemitone++;
|
Chris@23
|
671 }
|
matthiasm@1
|
672
|
Chris@23
|
673 } else {
|
Chris@29
|
674 float x[84+1000];
|
Chris@23
|
675 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
676 vector<int> signifIndex;
|
Chris@23
|
677 int index=0;
|
Chris@23
|
678 sumb /= 84.0;
|
Chris@23
|
679 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
680 float currval = 0;
|
Chris@23
|
681 currval += b[iNote + 1 + -1];
|
Chris@23
|
682 currval += b[iNote + 1 + 0];
|
Chris@23
|
683 currval += b[iNote + 1 + 1];
|
Chris@23
|
684 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
685 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
686 index++;
|
Chris@23
|
687 }
|
Chris@29
|
688 float rnorm;
|
Chris@29
|
689 float w[84+1000];
|
Chris@29
|
690 float zz[84+1000];
|
Chris@23
|
691 int indx[84+1000];
|
Chris@23
|
692 int mode;
|
Chris@23
|
693 int dictsize = 256*signifIndex.size();
|
Chris@23
|
694 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
695 float *curr_dict = new float[dictsize];
|
Chris@23
|
696 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
697 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
698 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
699 }
|
Chris@23
|
700 }
|
Chris@29
|
701 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
702 delete [] curr_dict;
|
Chris@23
|
703 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
704 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
705 // cerr << mode << endl;
|
Chris@23
|
706 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
707 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
708 }
|
Chris@23
|
709 }
|
Chris@23
|
710 }
|
matthiasm@13
|
711
|
matthiasm@10
|
712
|
matthiasm@12
|
713
|
matthiasm@13
|
714
|
Chris@23
|
715 f4.values = chroma;
|
Chris@23
|
716 f5.values = basschroma;
|
Chris@23
|
717 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
718 f6.values = chroma;
|
matthiasm@1
|
719
|
Chris@23
|
720 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
721 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
722 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
723 case 0: // should never end up here
|
Chris@23
|
724 break;
|
Chris@23
|
725 case 1:
|
Chris@23
|
726 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
727 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
728 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
729 break;
|
Chris@23
|
730 case 2:
|
Chris@23
|
731 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
732 chromanorm[0] += *it;
|
Chris@23
|
733 }
|
Chris@23
|
734 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
735 chromanorm[1] += *it;
|
Chris@23
|
736 }
|
Chris@23
|
737 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
738 chromanorm[2] += *it;
|
Chris@23
|
739 }
|
Chris@23
|
740 break;
|
Chris@23
|
741 case 3:
|
Chris@23
|
742 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
743 chromanorm[0] += pow(*it,2);
|
Chris@23
|
744 }
|
Chris@23
|
745 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
746 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
747 chromanorm[1] += pow(*it,2);
|
Chris@23
|
748 }
|
Chris@23
|
749 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
750 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
751 chromanorm[2] += pow(*it,2);
|
Chris@23
|
752 }
|
Chris@23
|
753 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
754 break;
|
Chris@23
|
755 }
|
Chris@23
|
756 if (chromanorm[0] > 0) {
|
Chris@23
|
757 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
758 f4.values[i] /= chromanorm[0];
|
Chris@23
|
759 }
|
Chris@23
|
760 }
|
Chris@23
|
761 if (chromanorm[1] > 0) {
|
Chris@23
|
762 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
763 f5.values[i] /= chromanorm[1];
|
Chris@23
|
764 }
|
Chris@23
|
765 }
|
Chris@23
|
766 if (chromanorm[2] > 0) {
|
Chris@23
|
767 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
768 f6.values[i] /= chromanorm[2];
|
Chris@23
|
769 }
|
Chris@23
|
770 }
|
matthiasm@13
|
771
|
Chris@23
|
772 }
|
matthiasm@13
|
773
|
Chris@23
|
774 // local chord estimation
|
Chris@23
|
775 vector<float> currentChordSalience;
|
Chris@23
|
776 float tempchordvalue = 0;
|
Chris@23
|
777 float sumchordvalue = 0;
|
matthiasm@9
|
778
|
Chris@23
|
779 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
780 tempchordvalue = 0;
|
Chris@23
|
781 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
782 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
783 }
|
Chris@23
|
784 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
785 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
786 }
|
Chris@23
|
787 sumchordvalue+=tempchordvalue;
|
Chris@23
|
788 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
789 }
|
Chris@23
|
790 if (sumchordvalue > 0) {
|
Chris@23
|
791 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
792 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
793 }
|
Chris@23
|
794 } else {
|
Chris@23
|
795 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
796 }
|
Chris@23
|
797 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
798
|
Chris@23
|
799 fsOut[3].push_back(f3);
|
Chris@23
|
800 fsOut[4].push_back(f4);
|
Chris@23
|
801 fsOut[5].push_back(f5);
|
Chris@23
|
802 fsOut[6].push_back(f6);
|
Chris@23
|
803 count++;
|
Chris@23
|
804 }
|
Chris@23
|
805 cerr << "done." << endl;
|
matthiasm@13
|
806
|
matthiasm@10
|
807
|
Chris@23
|
808 /* Simple chord estimation
|
Chris@23
|
809 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
810 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
811 */
|
Chris@23
|
812 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
813 count = 0;
|
Chris@23
|
814 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
815 vector<int> chordSequence;
|
Chris@23
|
816 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
817 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
818 scoreChordogram.push_back(temp);
|
Chris@23
|
819 }
|
Chris@23
|
820 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
821 int startIndex = count + 1;
|
Chris@23
|
822 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
823
|
Chris@23
|
824 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
825
|
Chris@23
|
826 vector<int> chordCandidates;
|
Chris@23
|
827 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
828 // float currsum = 0;
|
Chris@23
|
829 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
830 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
831 // }
|
Chris@23
|
832 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
833 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
834 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
835 chordCandidates.push_back(iChord);
|
Chris@23
|
836 break;
|
Chris@23
|
837 }
|
Chris@23
|
838 }
|
Chris@23
|
839 }
|
Chris@23
|
840 chordCandidates.push_back(nChord-1);
|
Chris@23
|
841 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
842
|
Chris@23
|
843 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
844 float maxindex = 0; //... and the index thereof
|
Chris@23
|
845 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
846 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
847
|
Chris@23
|
848 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
849 // now find the max values on both sides of iWF
|
Chris@23
|
850 // left side:
|
Chris@23
|
851 float maxL = 0;
|
Chris@23
|
852 unsigned maxindL = nChord-1;
|
Chris@23
|
853 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
854 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
855 float currsum = 0;
|
Chris@23
|
856 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
857 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
858 }
|
Chris@23
|
859 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
860 if (currsum > maxL) {
|
Chris@23
|
861 maxL = currsum;
|
Chris@23
|
862 maxindL = iChord;
|
Chris@23
|
863 }
|
Chris@23
|
864 }
|
Chris@23
|
865 // right side:
|
Chris@23
|
866 float maxR = 0;
|
Chris@23
|
867 unsigned maxindR = nChord-1;
|
Chris@23
|
868 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
869 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
870 float currsum = 0;
|
Chris@23
|
871 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
872 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
873 }
|
Chris@23
|
874 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
875 if (currsum > maxR) {
|
Chris@23
|
876 maxR = currsum;
|
Chris@23
|
877 maxindR = iChord;
|
Chris@23
|
878 }
|
Chris@23
|
879 }
|
Chris@23
|
880 if (maxL+maxR > maxval) {
|
Chris@23
|
881 maxval = maxL+maxR;
|
Chris@23
|
882 maxindex = iWF;
|
Chris@23
|
883 bestchordL = maxindL;
|
Chris@23
|
884 bestchordR = maxindR;
|
Chris@23
|
885 }
|
matthiasm@3
|
886
|
Chris@23
|
887 }
|
Chris@23
|
888 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
889 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
890 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
891 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
892 }
|
Chris@23
|
893 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
894 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
895 }
|
Chris@23
|
896 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
897 count++;
|
Chris@23
|
898 }
|
Chris@23
|
899 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
900 count = 0;
|
Chris@23
|
901 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
902 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
903 float maxindex = 0; //... and the index thereof
|
Chris@23
|
904 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
905 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
906 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
907 maxindex = iChord;
|
Chris@23
|
908 // cerr << iChord << endl;
|
Chris@23
|
909 }
|
Chris@23
|
910 }
|
Chris@23
|
911 chordSequence.push_back(maxindex);
|
Chris@23
|
912 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
913 count++;
|
Chris@23
|
914 }
|
Chris@23
|
915 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
916
|
matthiasm@3
|
917
|
Chris@23
|
918 // mode filter on chordSequence
|
Chris@23
|
919 count = 0;
|
Chris@23
|
920 string oldChord = "";
|
Chris@23
|
921 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
922 Feature f6 = *it;
|
Chris@23
|
923 Feature f7; // chord estimate
|
Chris@23
|
924 f7.hasTimestamp = true;
|
Chris@23
|
925 f7.timestamp = f6.timestamp;
|
Chris@23
|
926 Feature f8; // chord estimate
|
Chris@23
|
927 f8.hasTimestamp = true;
|
Chris@23
|
928 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
929
|
Chris@23
|
930 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
931 int maxChordCount = 0;
|
Chris@23
|
932 int maxChordIndex = nChord-1;
|
Chris@23
|
933 string maxChord;
|
Chris@23
|
934 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
935 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
936 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
937 chordCount[chordSequence[i]]++;
|
Chris@23
|
938 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
939 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
940 maxChordCount++;
|
Chris@23
|
941 maxChordIndex = chordSequence[i];
|
Chris@23
|
942 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
943 }
|
Chris@23
|
944 }
|
Chris@23
|
945 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
946 // cerr << maxChordIndex << endl;
|
Chris@23
|
947 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
948 // cerr << chordchange[count] << endl;
|
Chris@23
|
949 fsOut[9].push_back(f8);
|
Chris@23
|
950 if (oldChord != maxChord) {
|
Chris@23
|
951 oldChord = maxChord;
|
matthiasm@3
|
952
|
Chris@23
|
953 // char buffer1 [50];
|
Chris@23
|
954 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
955 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
956 // } else {
|
Chris@23
|
957 // sprintf(buffer1, "N");
|
Chris@23
|
958 // }
|
Chris@23
|
959 // f7.label = buffer1;
|
Chris@23
|
960 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
961 fsOut[7].push_back(f7);
|
Chris@23
|
962 }
|
Chris@23
|
963 count++;
|
Chris@23
|
964 }
|
Chris@23
|
965 Feature f7; // last chord estimate
|
Chris@23
|
966 f7.hasTimestamp = true;
|
Chris@23
|
967 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
968 f7.label = "N";
|
Chris@23
|
969 fsOut[7].push_back(f7);
|
Chris@23
|
970 cerr << "done." << endl;
|
Chris@23
|
971 // // musicity
|
Chris@23
|
972 // count = 0;
|
Chris@23
|
973 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
974 // vector<float> musicityValue;
|
Chris@23
|
975 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
976 // Feature f4 = *it;
|
Chris@23
|
977 //
|
Chris@23
|
978 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
979 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
980 // float chromasum = 0;
|
Chris@23
|
981 // float diffsum = 0;
|
Chris@23
|
982 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
983 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
984 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
985 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
986 // }
|
Chris@23
|
987 // }
|
Chris@23
|
988 // diffsum /= chromasum;
|
Chris@23
|
989 // musicityValue.push_back(diffsum);
|
Chris@23
|
990 // count++;
|
Chris@23
|
991 // }
|
Chris@23
|
992 //
|
Chris@23
|
993 // float musicityThreshold = 0.44;
|
Chris@23
|
994 // if (m_stepSize == 4096) {
|
Chris@23
|
995 // musicityThreshold = 0.74;
|
Chris@23
|
996 // }
|
Chris@23
|
997 // if (m_stepSize == 4410) {
|
Chris@23
|
998 // musicityThreshold = 0.77;
|
Chris@23
|
999 // }
|
Chris@23
|
1000 //
|
Chris@23
|
1001 // count = 0;
|
Chris@23
|
1002 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
1003 // Feature f4 = *it;
|
Chris@23
|
1004 // Feature f8; // musicity
|
Chris@23
|
1005 // Feature f9; // musicity segmenter
|
Chris@23
|
1006 //
|
Chris@23
|
1007 // f8.hasTimestamp = true;
|
Chris@23
|
1008 // f8.timestamp = f4.timestamp;
|
Chris@23
|
1009 // f9.hasTimestamp = true;
|
Chris@23
|
1010 // f9.timestamp = f4.timestamp;
|
Chris@23
|
1011 //
|
Chris@23
|
1012 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
1013 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
1014 // int musicityCount = 0;
|
Chris@23
|
1015 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
1016 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
1017 // }
|
Chris@23
|
1018 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
1019 //
|
Chris@23
|
1020 // if (isSpeech) {
|
Chris@23
|
1021 // if (oldlabeltype != 2) {
|
Chris@23
|
1022 // f9.label = "Speech";
|
Chris@23
|
1023 // fsOut[9].push_back(f9);
|
Chris@23
|
1024 // oldlabeltype = 2;
|
Chris@23
|
1025 // }
|
Chris@23
|
1026 // } else {
|
Chris@23
|
1027 // if (oldlabeltype != 1) {
|
Chris@23
|
1028 // f9.label = "Music";
|
Chris@23
|
1029 // fsOut[9].push_back(f9);
|
Chris@23
|
1030 // oldlabeltype = 1;
|
Chris@23
|
1031 // }
|
Chris@23
|
1032 // }
|
Chris@23
|
1033 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1034 // fsOut[8].push_back(f8);
|
Chris@23
|
1035 // count++;
|
Chris@23
|
1036 // }
|
Chris@23
|
1037 return fsOut;
|
matthiasm@0
|
1038
|
matthiasm@0
|
1039 }
|
matthiasm@0
|
1040
|
Chris@35
|
1041 #endif
|