Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@35
|
31 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
32 Plugin(inputSampleRate),
|
Chris@35
|
33 m_logSpectrum(0),
|
Chris@23
|
34 m_blockSize(0),
|
Chris@23
|
35 m_stepSize(0),
|
Chris@23
|
36 m_lengthOfNoteIndex(0),
|
mail@80
|
37 m_meanTunings(0),
|
mail@80
|
38 m_localTunings(0),
|
mail@41
|
39 m_whitening(1.0),
|
Chris@23
|
40 m_preset(0.0),
|
Chris@23
|
41 m_localTuning(0),
|
Chris@23
|
42 m_kernelValue(0),
|
Chris@23
|
43 m_kernelFftIndex(0),
|
Chris@23
|
44 m_kernelNoteIndex(0),
|
Chris@23
|
45 m_dict(0),
|
mail@60
|
46 m_tuneLocal(0),
|
Chris@23
|
47 m_chorddict(0),
|
Chris@23
|
48 m_chordnames(0),
|
Chris@23
|
49 m_doNormalizeChroma(0),
|
mail@60
|
50 m_rollon(0),
|
matthiasm@42
|
51 m_s(0.7),
|
matthiasm@50
|
52 m_useNNLS(1),
|
mail@80
|
53 m_useHMM(1),
|
mail@80
|
54 sinvalues(0),
|
mail@80
|
55 cosvalues(0)
|
matthiasm@0
|
56 {
|
Chris@35
|
57 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
58
|
Chris@23
|
59 // make the *note* dictionary matrix
|
Chris@23
|
60 m_dict = new float[nNote * 84];
|
Chris@23
|
61 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
62 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@7
|
63
|
Chris@23
|
64 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
65 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
66 }
|
matthiasm@0
|
67
|
matthiasm@0
|
68
|
Chris@35
|
69 NNLSBase::~NNLSBase()
|
matthiasm@0
|
70 {
|
Chris@35
|
71 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
72 delete [] m_dict;
|
matthiasm@0
|
73 }
|
matthiasm@0
|
74
|
matthiasm@0
|
75 string
|
Chris@35
|
76 NNLSBase::getMaker() const
|
matthiasm@0
|
77 {
|
Chris@23
|
78 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
79 // Your name here
|
matthiasm@0
|
80 return "Matthias Mauch";
|
matthiasm@0
|
81 }
|
matthiasm@0
|
82
|
matthiasm@0
|
83 int
|
Chris@35
|
84 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
85 {
|
Chris@23
|
86 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
87 // Increment this each time you release a version that behaves
|
matthiasm@0
|
88 // differently from the previous one
|
matthiasm@0
|
89 return 1;
|
matthiasm@0
|
90 }
|
matthiasm@0
|
91
|
matthiasm@0
|
92 string
|
Chris@35
|
93 NNLSBase::getCopyright() const
|
matthiasm@0
|
94 {
|
Chris@23
|
95 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
96 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
97 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
98 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
99 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
100 return "GPL";
|
matthiasm@0
|
101 }
|
matthiasm@0
|
102
|
Chris@35
|
103 NNLSBase::InputDomain
|
Chris@35
|
104 NNLSBase::getInputDomain() const
|
matthiasm@0
|
105 {
|
Chris@23
|
106 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
107 return FrequencyDomain;
|
matthiasm@0
|
108 }
|
matthiasm@0
|
109
|
matthiasm@0
|
110 size_t
|
Chris@35
|
111 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
112 {
|
Chris@23
|
113 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
114 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
115 }
|
matthiasm@0
|
116
|
matthiasm@0
|
117 size_t
|
Chris@35
|
118 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
119 {
|
Chris@23
|
120 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
121 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
122 // means the same as the block size for TimeDomain
|
Chris@23
|
123 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
124 }
|
matthiasm@0
|
125
|
matthiasm@0
|
126 size_t
|
Chris@35
|
127 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
128 {
|
Chris@23
|
129 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
130 return 1;
|
matthiasm@0
|
131 }
|
matthiasm@0
|
132
|
matthiasm@0
|
133 size_t
|
Chris@35
|
134 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
135 {
|
Chris@23
|
136 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
137 return 1;
|
matthiasm@0
|
138 }
|
matthiasm@0
|
139
|
Chris@35
|
140 NNLSBase::ParameterList
|
Chris@35
|
141 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
142 {
|
Chris@23
|
143 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
144 ParameterList list;
|
matthiasm@0
|
145
|
matthiasm@42
|
146 ParameterDescriptor d;
|
matthiasm@42
|
147 d.identifier = "useNNLS";
|
matthiasm@42
|
148 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
149 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
150 d.unit = "";
|
matthiasm@42
|
151 d.minValue = 0.0;
|
matthiasm@42
|
152 d.maxValue = 1.0;
|
matthiasm@42
|
153 d.defaultValue = 1.0;
|
matthiasm@42
|
154 d.isQuantized = true;
|
matthiasm@42
|
155 d.quantizeStep = 1.0;
|
matthiasm@42
|
156 list.push_back(d);
|
matthiasm@42
|
157
|
mail@41
|
158 ParameterDescriptor d0;
|
mail@41
|
159 d0.identifier = "rollon";
|
mail@41
|
160 d0.name = "spectral roll-on";
|
matthiasm@58
|
161 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
162 d0.unit = "%";
|
mail@41
|
163 d0.minValue = 0;
|
matthiasm@59
|
164 d0.maxValue = 5;
|
mail@41
|
165 d0.defaultValue = 0;
|
matthiasm@48
|
166 d0.isQuantized = true;
|
matthiasm@59
|
167 d0.quantizeStep = 0.5;
|
mail@41
|
168 list.push_back(d0);
|
matthiasm@4
|
169
|
matthiasm@4
|
170 ParameterDescriptor d1;
|
matthiasm@4
|
171 d1.identifier = "tuningmode";
|
matthiasm@4
|
172 d1.name = "tuning mode";
|
matthiasm@4
|
173 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
174 d1.unit = "";
|
matthiasm@4
|
175 d1.minValue = 0;
|
matthiasm@4
|
176 d1.maxValue = 1;
|
matthiasm@4
|
177 d1.defaultValue = 0;
|
matthiasm@4
|
178 d1.isQuantized = true;
|
matthiasm@4
|
179 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
180 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
181 d1.quantizeStep = 1.0;
|
matthiasm@4
|
182 list.push_back(d1);
|
matthiasm@4
|
183
|
mail@41
|
184 ParameterDescriptor d2;
|
mail@41
|
185 d2.identifier = "whitening";
|
mail@41
|
186 d2.name = "spectral whitening";
|
mail@41
|
187 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
188 d2.unit = "";
|
mail@41
|
189 d2.isQuantized = true;
|
mail@41
|
190 d2.minValue = 0.0;
|
mail@41
|
191 d2.maxValue = 1.0;
|
mail@41
|
192 d2.defaultValue = 1.0;
|
mail@41
|
193 d2.isQuantized = false;
|
mail@41
|
194 list.push_back(d2);
|
mail@41
|
195
|
mail@41
|
196 ParameterDescriptor d3;
|
mail@41
|
197 d3.identifier = "s";
|
mail@41
|
198 d3.name = "spectral shape";
|
mail@41
|
199 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
200 d3.unit = "";
|
mail@41
|
201 d3.minValue = 0.5;
|
mail@41
|
202 d3.maxValue = 0.9;
|
mail@41
|
203 d3.defaultValue = 0.7;
|
mail@41
|
204 d3.isQuantized = false;
|
mail@41
|
205 list.push_back(d3);
|
mail@41
|
206
|
Chris@23
|
207 ParameterDescriptor d4;
|
matthiasm@12
|
208 d4.identifier = "chromanormalize";
|
matthiasm@12
|
209 d4.name = "chroma normalization";
|
matthiasm@12
|
210 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
211 d4.unit = "";
|
matthiasm@12
|
212 d4.minValue = 0;
|
matthiasm@13
|
213 d4.maxValue = 3;
|
matthiasm@12
|
214 d4.defaultValue = 0;
|
matthiasm@12
|
215 d4.isQuantized = true;
|
matthiasm@13
|
216 d4.valueNames.push_back("none");
|
matthiasm@13
|
217 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
218 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
219 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
220 d4.quantizeStep = 1.0;
|
matthiasm@12
|
221 list.push_back(d4);
|
matthiasm@4
|
222
|
matthiasm@0
|
223 return list;
|
matthiasm@0
|
224 }
|
matthiasm@0
|
225
|
matthiasm@0
|
226 float
|
Chris@35
|
227 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
228 {
|
Chris@23
|
229 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
230 if (identifier == "useNNLS") {
|
matthiasm@42
|
231 return m_useNNLS;
|
matthiasm@0
|
232 }
|
matthiasm@0
|
233
|
mail@41
|
234 if (identifier == "whitening") {
|
mail@41
|
235 return m_whitening;
|
mail@41
|
236 }
|
mail@41
|
237
|
mail@41
|
238 if (identifier == "s") {
|
mail@41
|
239 return m_s;
|
matthiasm@0
|
240 }
|
matthiasm@17
|
241
|
Chris@23
|
242 if (identifier == "rollon") {
|
matthiasm@17
|
243 return m_rollon;
|
matthiasm@17
|
244 }
|
matthiasm@0
|
245
|
matthiasm@0
|
246 if (identifier == "tuningmode") {
|
matthiasm@0
|
247 if (m_tuneLocal) {
|
matthiasm@0
|
248 return 1.0;
|
matthiasm@0
|
249 } else {
|
matthiasm@0
|
250 return 0.0;
|
matthiasm@0
|
251 }
|
matthiasm@0
|
252 }
|
Chris@23
|
253 if (identifier == "preset") {
|
Chris@23
|
254 return m_preset;
|
matthiasm@3
|
255 }
|
Chris@23
|
256 if (identifier == "chromanormalize") {
|
Chris@23
|
257 return m_doNormalizeChroma;
|
matthiasm@12
|
258 }
|
matthiasm@50
|
259
|
matthiasm@50
|
260 if (identifier == "useHMM") {
|
matthiasm@50
|
261 return m_useHMM;
|
matthiasm@50
|
262 }
|
matthiasm@50
|
263
|
matthiasm@0
|
264 return 0;
|
matthiasm@0
|
265
|
matthiasm@0
|
266 }
|
matthiasm@0
|
267
|
matthiasm@0
|
268 void
|
Chris@35
|
269 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
270 {
|
Chris@23
|
271 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
272 if (identifier == "useNNLS") {
|
matthiasm@42
|
273 m_useNNLS = (int) value;
|
matthiasm@0
|
274 }
|
matthiasm@0
|
275
|
mail@41
|
276 if (identifier == "whitening") {
|
mail@41
|
277 m_whitening = value;
|
matthiasm@0
|
278 }
|
matthiasm@0
|
279
|
mail@41
|
280 if (identifier == "s") {
|
mail@41
|
281 m_s = value;
|
mail@41
|
282 }
|
mail@41
|
283
|
matthiasm@50
|
284 if (identifier == "useHMM") {
|
matthiasm@50
|
285 m_useHMM = value;
|
matthiasm@50
|
286 }
|
matthiasm@50
|
287
|
matthiasm@0
|
288 if (identifier == "tuningmode") {
|
mail@60
|
289 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
290 m_tuneLocal = value;
|
matthiasm@0
|
291 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
292 }
|
matthiasm@42
|
293 // if (identifier == "preset") {
|
matthiasm@42
|
294 // m_preset = value;
|
matthiasm@42
|
295 // if (m_preset == 0.0) {
|
matthiasm@42
|
296 // m_tuneLocal = false;
|
matthiasm@42
|
297 // m_whitening = 1.0;
|
matthiasm@42
|
298 // m_dictID = 0.0;
|
matthiasm@42
|
299 // }
|
matthiasm@42
|
300 // if (m_preset == 1.0) {
|
matthiasm@42
|
301 // m_tuneLocal = false;
|
matthiasm@42
|
302 // m_whitening = 1.0;
|
matthiasm@42
|
303 // m_dictID = 1.0;
|
matthiasm@42
|
304 // }
|
matthiasm@42
|
305 // if (m_preset == 2.0) {
|
matthiasm@42
|
306 // m_tuneLocal = false;
|
matthiasm@42
|
307 // m_whitening = 0.7;
|
matthiasm@42
|
308 // m_dictID = 0.0;
|
matthiasm@42
|
309 // }
|
matthiasm@42
|
310 // }
|
Chris@23
|
311 if (identifier == "chromanormalize") {
|
Chris@23
|
312 m_doNormalizeChroma = value;
|
Chris@23
|
313 }
|
matthiasm@17
|
314
|
Chris@23
|
315 if (identifier == "rollon") {
|
Chris@23
|
316 m_rollon = value;
|
Chris@23
|
317 }
|
matthiasm@0
|
318 }
|
matthiasm@0
|
319
|
Chris@35
|
320 NNLSBase::ProgramList
|
Chris@35
|
321 NNLSBase::getPrograms() const
|
matthiasm@0
|
322 {
|
Chris@23
|
323 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
324 ProgramList list;
|
matthiasm@0
|
325
|
matthiasm@0
|
326 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
327 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
328
|
matthiasm@0
|
329 return list;
|
matthiasm@0
|
330 }
|
matthiasm@0
|
331
|
matthiasm@0
|
332 string
|
Chris@35
|
333 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
334 {
|
Chris@23
|
335 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
336 return ""; // no programs
|
matthiasm@0
|
337 }
|
matthiasm@0
|
338
|
matthiasm@0
|
339 void
|
Chris@35
|
340 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
341 {
|
Chris@23
|
342 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
343 }
|
matthiasm@0
|
344
|
matthiasm@0
|
345
|
matthiasm@0
|
346 bool
|
Chris@35
|
347 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
348 {
|
Chris@23
|
349 if (debug_on) {
|
Chris@23
|
350 cerr << "--> initialise";
|
Chris@23
|
351 }
|
matthiasm@1
|
352
|
mail@80
|
353 // make things for tuning estimation
|
mail@80
|
354 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
355 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
356 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
357 }
|
mail@80
|
358
|
mail@80
|
359
|
mail@80
|
360 // make hamming window of length 1/2 octave
|
mail@76
|
361 int hamwinlength = nBPS * 6 + 1;
|
mail@76
|
362 float hamwinsum = 0;
|
mail@76
|
363 for (int i = 0; i < hamwinlength; ++i) {
|
mail@76
|
364 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
mail@76
|
365 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
mail@76
|
366 }
|
mail@77
|
367 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
|
mail@80
|
368
|
mail@80
|
369
|
mail@80
|
370 // initialise the tuning
|
mail@80
|
371 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
372 m_meanTunings.push_back(0);
|
mail@80
|
373 m_localTunings.push_back(0);
|
mail@80
|
374 }
|
mail@76
|
375
|
matthiasm@0
|
376 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
377 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
378 m_blockSize = blockSize;
|
matthiasm@0
|
379 m_stepSize = stepSize;
|
Chris@35
|
380 m_frameCount = 0;
|
mail@77
|
381 int tempn = nNote * m_blockSize/2;
|
Chris@23
|
382 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
383 float *tempkernel;
|
matthiasm@1
|
384
|
Chris@23
|
385 tempkernel = new float[tempn];
|
matthiasm@1
|
386
|
Chris@23
|
387 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
388 m_kernelValue.clear();
|
Chris@23
|
389 m_kernelFftIndex.clear();
|
Chris@23
|
390 m_kernelNoteIndex.clear();
|
Chris@23
|
391 int countNonzero = 0;
|
Chris@23
|
392 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
393 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
394 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
395 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
396 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
397 countNonzero++;
|
Chris@23
|
398 }
|
Chris@23
|
399 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
400 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
401 }
|
Chris@23
|
402 }
|
Chris@23
|
403 }
|
Chris@23
|
404 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
405 delete [] tempkernel;
|
Chris@35
|
406 /*
|
Chris@23
|
407 ofstream myfile;
|
Chris@23
|
408 myfile.open ("matrix.txt");
|
matthiasm@3
|
409 // myfile << "Writing this to a file.\n";
|
Chris@23
|
410 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
411 myfile << m_dict[i] << endl;
|
Chris@23
|
412 }
|
matthiasm@3
|
413 myfile.close();
|
Chris@35
|
414 */
|
matthiasm@0
|
415 return true;
|
matthiasm@0
|
416 }
|
matthiasm@0
|
417
|
matthiasm@0
|
418 void
|
Chris@35
|
419 NNLSBase::reset()
|
matthiasm@0
|
420 {
|
Chris@23
|
421 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
422
|
matthiasm@0
|
423 // Clear buffers, reset stored values, etc
|
Chris@35
|
424 m_frameCount = 0;
|
matthiasm@42
|
425 // m_dictID = 0;
|
Chris@35
|
426 m_logSpectrum.clear();
|
mail@80
|
427 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
428 m_meanTunings[iBPS] = 0;
|
mail@80
|
429 m_localTunings[iBPS] = 0;
|
mail@80
|
430 }
|
Chris@23
|
431 m_localTuning.clear();
|
matthiasm@0
|
432 }
|
matthiasm@0
|
433
|
Chris@35
|
434 void
|
Chris@35
|
435 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
436 {
|
Chris@35
|
437 m_frameCount++;
|
Chris@23
|
438 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
439
|
Chris@23
|
440 const float *fbuf = inputBuffers[0];
|
Chris@23
|
441 float energysum = 0;
|
Chris@23
|
442 // make magnitude
|
Chris@23
|
443 float maxmag = -10000;
|
Chris@23
|
444 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
445 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
446 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
447 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
448 if (m_rollon > 0) {
|
Chris@23
|
449 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
450 }
|
Chris@23
|
451 }
|
matthiasm@14
|
452
|
Chris@23
|
453 float cumenergy = 0;
|
Chris@23
|
454 if (m_rollon > 0) {
|
Chris@23
|
455 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
456 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
457 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
458 else break;
|
Chris@23
|
459 }
|
Chris@23
|
460 }
|
matthiasm@17
|
461
|
Chris@23
|
462 if (maxmag < 2) {
|
Chris@23
|
463 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
464 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
465 magnitude[iBin] = 0;
|
Chris@23
|
466 }
|
Chris@23
|
467 }
|
matthiasm@4
|
468
|
Chris@23
|
469 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
470 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
471 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
472 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
473 }
|
Chris@23
|
474 int binCount = 0;
|
Chris@23
|
475 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
476 // cerr << ".";
|
Chris@23
|
477 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
478 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
479 binCount++;
|
Chris@23
|
480 }
|
Chris@23
|
481 // cerr << nm[20];
|
Chris@23
|
482 // cerr << endl;
|
matthiasm@0
|
483
|
matthiasm@0
|
484
|
Chris@35
|
485 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
486 // update means of complex tuning variables
|
mail@80
|
487 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
|
mail@80
|
488
|
mail@80
|
489 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
|
mail@80
|
490 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
|
Chris@23
|
491 float ratioOld = 0.997;
|
mail@80
|
492 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
493 m_localTunings[iBPS] *= ratioOld;
|
mail@80
|
494 m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
|
mail@80
|
495 }
|
matthiasm@0
|
496 }
|
matthiasm@0
|
497 // if (m_tuneLocal) {
|
Chris@23
|
498 // local tuning
|
mail@80
|
499 // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
|
mail@80
|
500 // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
|
mail@80
|
501
|
mail@80
|
502 float localTuningImag = 0;
|
mail@80
|
503 float localTuningReal = 0;
|
mail@80
|
504 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
505 localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
506 localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
507 }
|
mail@80
|
508
|
Chris@23
|
509 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
510 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
511
|
Chris@23
|
512 Feature f1; // logfreqspec
|
Chris@23
|
513 f1.hasTimestamp = true;
|
matthiasm@0
|
514 f1.timestamp = timestamp;
|
Chris@23
|
515 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
516 f1.values.push_back(nm[iNote]);
|
Chris@23
|
517 }
|
matthiasm@0
|
518
|
matthiasm@0
|
519 // deletes
|
matthiasm@0
|
520 delete[] magnitude;
|
matthiasm@0
|
521 delete[] nm;
|
matthiasm@0
|
522
|
Chris@35
|
523 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
524 }
|
matthiasm@0
|
525
|
Chris@35
|
526
|
Chris@35
|
527 #ifdef NOT_DEFINED
|
Chris@35
|
528
|
Chris@35
|
529 NNLSBase::FeatureSet
|
Chris@35
|
530 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
531 {
|
Chris@23
|
532 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
533 FeatureSet fsOut;
|
Chris@35
|
534 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
535 int nChord = m_chordnames.size();
|
Chris@23
|
536 //
|
Chris@23
|
537 /** Calculate Tuning
|
Chris@23
|
538 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
539 cumulative mean real and imag values)
|
Chris@23
|
540 **/
|
mail@80
|
541 float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
|
mail@80
|
542 float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
|
Chris@23
|
543 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
544 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
545 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
546 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
547
|
Chris@23
|
548 char buffer0 [50];
|
matthiasm@1
|
549
|
Chris@23
|
550 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
551
|
Chris@23
|
552 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
553
|
Chris@23
|
554 // push tuning to FeatureSet fsOut
|
Chris@23
|
555 Feature f0; // tuning
|
Chris@23
|
556 f0.hasTimestamp = true;
|
Chris@23
|
557 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
558 f0.label = buffer0;
|
Chris@23
|
559 fsOut[0].push_back(f0);
|
matthiasm@1
|
560
|
Chris@23
|
561 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
562 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
563 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
564 **/
|
Chris@23
|
565 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
566
|
Chris@23
|
567 float tempValue = 0;
|
Chris@23
|
568 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
569 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
570 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
571 int count = 0;
|
matthiasm@1
|
572
|
Chris@35
|
573 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
574 Feature f1 = *i;
|
Chris@23
|
575 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
576 f2.hasTimestamp = true;
|
Chris@23
|
577 f2.timestamp = f1.timestamp;
|
Chris@23
|
578 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
579
|
mail@60
|
580 if (m_tuneLocal == 1.0) {
|
Chris@23
|
581 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
582 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
583 }
|
matthiasm@1
|
584
|
mail@80
|
585 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
586
|
Chris@23
|
587 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@80
|
588 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
|
Chris@23
|
589 f2.values.push_back(tempValue);
|
Chris@23
|
590 }
|
matthiasm@1
|
591
|
Chris@23
|
592 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
593 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
594 vector<float> runningstd;
|
mail@77
|
595 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
596 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
597 }
|
Chris@23
|
598 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
599 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
600 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
601 if (runningstd[i] > 0) {
|
Chris@23
|
602 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@41
|
603 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
604 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@41
|
605 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
606 }
|
Chris@23
|
607 if (f2.values[i] < 0) {
|
Chris@23
|
608 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
609 }
|
Chris@23
|
610 }
|
Chris@23
|
611 fsOut[2].push_back(f2);
|
Chris@23
|
612 count++;
|
Chris@23
|
613 }
|
Chris@23
|
614 cerr << "done." << endl;
|
matthiasm@1
|
615
|
Chris@23
|
616 /** Semitone spectrum and chromagrams
|
Chris@23
|
617 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
618 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
619 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
620 bass and treble stacked onto each other).
|
Chris@23
|
621 **/
|
matthiasm@42
|
622 if (m_useNNLS == 0) {
|
Chris@23
|
623 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
624 } else {
|
Chris@23
|
625 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
626 }
|
matthiasm@13
|
627
|
matthiasm@1
|
628
|
Chris@23
|
629 vector<vector<float> > chordogram;
|
Chris@23
|
630 vector<vector<int> > scoreChordogram;
|
Chris@23
|
631 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
632 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
633 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
634 count = 0;
|
matthiasm@9
|
635
|
Chris@23
|
636 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
637 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
638 Feature f3; // semitone spectrum
|
Chris@23
|
639 Feature f4; // treble chromagram
|
Chris@23
|
640 Feature f5; // bass chromagram
|
Chris@23
|
641 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
642
|
Chris@23
|
643 f3.hasTimestamp = true;
|
Chris@23
|
644 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
645
|
Chris@23
|
646 f4.hasTimestamp = true;
|
Chris@23
|
647 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
648
|
Chris@23
|
649 f5.hasTimestamp = true;
|
Chris@23
|
650 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
651
|
Chris@23
|
652 f6.hasTimestamp = true;
|
Chris@23
|
653 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
654
|
mail@77
|
655 float b[nNote];
|
matthiasm@1
|
656
|
Chris@23
|
657 bool some_b_greater_zero = false;
|
Chris@23
|
658 float sumb = 0;
|
mail@77
|
659 for (int i = 0; i < nNote; i++) {
|
mail@77
|
660 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
Chris@23
|
661 b[i] = f2.values[i];
|
Chris@23
|
662 sumb += b[i];
|
Chris@23
|
663 if (b[i] > 0) {
|
Chris@23
|
664 some_b_greater_zero = true;
|
Chris@23
|
665 }
|
Chris@23
|
666 }
|
matthiasm@1
|
667
|
Chris@23
|
668 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
669
|
Chris@23
|
670 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
671 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
672 float currval;
|
Chris@23
|
673 unsigned iSemitone = 0;
|
matthiasm@1
|
674
|
Chris@23
|
675 if (some_b_greater_zero) {
|
matthiasm@42
|
676 if (m_useNNLS == 0) {
|
Chris@23
|
677 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
678 currval = 0;
|
Chris@23
|
679 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
680 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
681 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
682 f3.values.push_back(currval);
|
Chris@23
|
683 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
684 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
685 iSemitone++;
|
Chris@23
|
686 }
|
matthiasm@1
|
687
|
Chris@23
|
688 } else {
|
Chris@29
|
689 float x[84+1000];
|
Chris@23
|
690 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
691 vector<int> signifIndex;
|
Chris@23
|
692 int index=0;
|
Chris@23
|
693 sumb /= 84.0;
|
Chris@23
|
694 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
695 float currval = 0;
|
Chris@23
|
696 currval += b[iNote + 1 + -1];
|
Chris@23
|
697 currval += b[iNote + 1 + 0];
|
Chris@23
|
698 currval += b[iNote + 1 + 1];
|
Chris@23
|
699 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
700 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
701 index++;
|
Chris@23
|
702 }
|
Chris@29
|
703 float rnorm;
|
Chris@29
|
704 float w[84+1000];
|
Chris@29
|
705 float zz[84+1000];
|
Chris@23
|
706 int indx[84+1000];
|
Chris@23
|
707 int mode;
|
mail@77
|
708 int dictsize = nNote*signifIndex.size();
|
Chris@23
|
709 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
710 float *curr_dict = new float[dictsize];
|
Chris@23
|
711 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@77
|
712 for (unsigned iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
713 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
714 }
|
Chris@23
|
715 }
|
Chris@29
|
716 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
717 delete [] curr_dict;
|
Chris@23
|
718 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
719 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
720 // cerr << mode << endl;
|
Chris@23
|
721 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
722 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
723 }
|
Chris@23
|
724 }
|
Chris@23
|
725 }
|
matthiasm@13
|
726
|
matthiasm@10
|
727
|
matthiasm@12
|
728
|
matthiasm@13
|
729
|
Chris@23
|
730 f4.values = chroma;
|
Chris@23
|
731 f5.values = basschroma;
|
Chris@23
|
732 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
733 f6.values = chroma;
|
matthiasm@1
|
734
|
Chris@23
|
735 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
736 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
737 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
738 case 0: // should never end up here
|
Chris@23
|
739 break;
|
Chris@23
|
740 case 1:
|
Chris@23
|
741 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
742 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
743 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
744 break;
|
Chris@23
|
745 case 2:
|
Chris@23
|
746 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
747 chromanorm[0] += *it;
|
Chris@23
|
748 }
|
Chris@23
|
749 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
750 chromanorm[1] += *it;
|
Chris@23
|
751 }
|
Chris@23
|
752 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
753 chromanorm[2] += *it;
|
Chris@23
|
754 }
|
Chris@23
|
755 break;
|
Chris@23
|
756 case 3:
|
Chris@23
|
757 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
758 chromanorm[0] += pow(*it,2);
|
Chris@23
|
759 }
|
Chris@23
|
760 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
761 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
762 chromanorm[1] += pow(*it,2);
|
Chris@23
|
763 }
|
Chris@23
|
764 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
765 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
766 chromanorm[2] += pow(*it,2);
|
Chris@23
|
767 }
|
Chris@23
|
768 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
769 break;
|
Chris@23
|
770 }
|
Chris@23
|
771 if (chromanorm[0] > 0) {
|
Chris@23
|
772 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
773 f4.values[i] /= chromanorm[0];
|
Chris@23
|
774 }
|
Chris@23
|
775 }
|
Chris@23
|
776 if (chromanorm[1] > 0) {
|
Chris@23
|
777 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
778 f5.values[i] /= chromanorm[1];
|
Chris@23
|
779 }
|
Chris@23
|
780 }
|
Chris@23
|
781 if (chromanorm[2] > 0) {
|
Chris@23
|
782 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
783 f6.values[i] /= chromanorm[2];
|
Chris@23
|
784 }
|
Chris@23
|
785 }
|
matthiasm@13
|
786
|
Chris@23
|
787 }
|
matthiasm@13
|
788
|
Chris@23
|
789 // local chord estimation
|
Chris@23
|
790 vector<float> currentChordSalience;
|
Chris@23
|
791 float tempchordvalue = 0;
|
Chris@23
|
792 float sumchordvalue = 0;
|
matthiasm@9
|
793
|
Chris@23
|
794 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
795 tempchordvalue = 0;
|
Chris@23
|
796 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
797 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
798 }
|
Chris@23
|
799 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
800 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
801 }
|
Chris@23
|
802 sumchordvalue+=tempchordvalue;
|
Chris@23
|
803 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
804 }
|
Chris@23
|
805 if (sumchordvalue > 0) {
|
Chris@23
|
806 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
807 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
808 }
|
Chris@23
|
809 } else {
|
Chris@23
|
810 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
811 }
|
Chris@23
|
812 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
813
|
Chris@23
|
814 fsOut[3].push_back(f3);
|
Chris@23
|
815 fsOut[4].push_back(f4);
|
Chris@23
|
816 fsOut[5].push_back(f5);
|
Chris@23
|
817 fsOut[6].push_back(f6);
|
Chris@23
|
818 count++;
|
Chris@23
|
819 }
|
Chris@23
|
820 cerr << "done." << endl;
|
matthiasm@13
|
821
|
matthiasm@10
|
822
|
Chris@23
|
823 /* Simple chord estimation
|
Chris@23
|
824 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
825 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
826 */
|
Chris@23
|
827 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
828 count = 0;
|
Chris@23
|
829 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
830 vector<int> chordSequence;
|
Chris@23
|
831 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
832 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
833 scoreChordogram.push_back(temp);
|
Chris@23
|
834 }
|
Chris@23
|
835 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
836 int startIndex = count + 1;
|
Chris@23
|
837 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
838
|
Chris@23
|
839 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
840
|
Chris@23
|
841 vector<int> chordCandidates;
|
Chris@23
|
842 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
843 // float currsum = 0;
|
Chris@23
|
844 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
845 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
846 // }
|
Chris@23
|
847 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
848 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
849 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
850 chordCandidates.push_back(iChord);
|
Chris@23
|
851 break;
|
Chris@23
|
852 }
|
Chris@23
|
853 }
|
Chris@23
|
854 }
|
Chris@23
|
855 chordCandidates.push_back(nChord-1);
|
Chris@23
|
856 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
857
|
Chris@23
|
858 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
859 float maxindex = 0; //... and the index thereof
|
Chris@23
|
860 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
861 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
862
|
Chris@23
|
863 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
864 // now find the max values on both sides of iWF
|
Chris@23
|
865 // left side:
|
Chris@23
|
866 float maxL = 0;
|
Chris@23
|
867 unsigned maxindL = nChord-1;
|
Chris@23
|
868 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
869 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
870 float currsum = 0;
|
Chris@23
|
871 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
872 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
873 }
|
Chris@23
|
874 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
875 if (currsum > maxL) {
|
Chris@23
|
876 maxL = currsum;
|
Chris@23
|
877 maxindL = iChord;
|
Chris@23
|
878 }
|
Chris@23
|
879 }
|
Chris@23
|
880 // right side:
|
Chris@23
|
881 float maxR = 0;
|
Chris@23
|
882 unsigned maxindR = nChord-1;
|
Chris@23
|
883 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
884 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
885 float currsum = 0;
|
Chris@23
|
886 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
887 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
888 }
|
Chris@23
|
889 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
890 if (currsum > maxR) {
|
Chris@23
|
891 maxR = currsum;
|
Chris@23
|
892 maxindR = iChord;
|
Chris@23
|
893 }
|
Chris@23
|
894 }
|
Chris@23
|
895 if (maxL+maxR > maxval) {
|
Chris@23
|
896 maxval = maxL+maxR;
|
Chris@23
|
897 maxindex = iWF;
|
Chris@23
|
898 bestchordL = maxindL;
|
Chris@23
|
899 bestchordR = maxindR;
|
Chris@23
|
900 }
|
matthiasm@3
|
901
|
Chris@23
|
902 }
|
Chris@23
|
903 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
904 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
905 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
906 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
907 }
|
Chris@23
|
908 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
909 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
910 }
|
Chris@23
|
911 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
912 count++;
|
Chris@23
|
913 }
|
Chris@23
|
914 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
915 count = 0;
|
Chris@23
|
916 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
917 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
918 float maxindex = 0; //... and the index thereof
|
Chris@23
|
919 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
920 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
921 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
922 maxindex = iChord;
|
Chris@23
|
923 // cerr << iChord << endl;
|
Chris@23
|
924 }
|
Chris@23
|
925 }
|
Chris@23
|
926 chordSequence.push_back(maxindex);
|
Chris@23
|
927 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
928 count++;
|
Chris@23
|
929 }
|
Chris@23
|
930 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
931
|
matthiasm@3
|
932
|
Chris@23
|
933 // mode filter on chordSequence
|
Chris@23
|
934 count = 0;
|
Chris@23
|
935 string oldChord = "";
|
Chris@23
|
936 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
937 Feature f6 = *it;
|
Chris@23
|
938 Feature f7; // chord estimate
|
Chris@23
|
939 f7.hasTimestamp = true;
|
Chris@23
|
940 f7.timestamp = f6.timestamp;
|
Chris@23
|
941 Feature f8; // chord estimate
|
Chris@23
|
942 f8.hasTimestamp = true;
|
Chris@23
|
943 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
944
|
Chris@23
|
945 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
946 int maxChordCount = 0;
|
Chris@23
|
947 int maxChordIndex = nChord-1;
|
Chris@23
|
948 string maxChord;
|
Chris@23
|
949 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
950 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
951 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
952 chordCount[chordSequence[i]]++;
|
Chris@23
|
953 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
954 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
955 maxChordCount++;
|
Chris@23
|
956 maxChordIndex = chordSequence[i];
|
Chris@23
|
957 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
958 }
|
Chris@23
|
959 }
|
Chris@23
|
960 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
961 // cerr << maxChordIndex << endl;
|
Chris@23
|
962 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
963 // cerr << chordchange[count] << endl;
|
Chris@23
|
964 fsOut[9].push_back(f8);
|
Chris@23
|
965 if (oldChord != maxChord) {
|
Chris@23
|
966 oldChord = maxChord;
|
matthiasm@3
|
967
|
Chris@23
|
968 // char buffer1 [50];
|
Chris@23
|
969 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
970 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
971 // } else {
|
Chris@23
|
972 // sprintf(buffer1, "N");
|
Chris@23
|
973 // }
|
Chris@23
|
974 // f7.label = buffer1;
|
Chris@23
|
975 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
976 fsOut[7].push_back(f7);
|
Chris@23
|
977 }
|
Chris@23
|
978 count++;
|
Chris@23
|
979 }
|
Chris@23
|
980 Feature f7; // last chord estimate
|
Chris@23
|
981 f7.hasTimestamp = true;
|
Chris@23
|
982 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
983 f7.label = "N";
|
Chris@23
|
984 fsOut[7].push_back(f7);
|
Chris@23
|
985 cerr << "done." << endl;
|
Chris@23
|
986 // // musicity
|
Chris@23
|
987 // count = 0;
|
Chris@23
|
988 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
989 // vector<float> musicityValue;
|
Chris@23
|
990 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
991 // Feature f4 = *it;
|
Chris@23
|
992 //
|
Chris@23
|
993 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
994 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
995 // float chromasum = 0;
|
Chris@23
|
996 // float diffsum = 0;
|
Chris@23
|
997 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
998 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
999 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
1000 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
1001 // }
|
Chris@23
|
1002 // }
|
Chris@23
|
1003 // diffsum /= chromasum;
|
Chris@23
|
1004 // musicityValue.push_back(diffsum);
|
Chris@23
|
1005 // count++;
|
Chris@23
|
1006 // }
|
Chris@23
|
1007 //
|
Chris@23
|
1008 // float musicityThreshold = 0.44;
|
Chris@23
|
1009 // if (m_stepSize == 4096) {
|
Chris@23
|
1010 // musicityThreshold = 0.74;
|
Chris@23
|
1011 // }
|
Chris@23
|
1012 // if (m_stepSize == 4410) {
|
Chris@23
|
1013 // musicityThreshold = 0.77;
|
Chris@23
|
1014 // }
|
Chris@23
|
1015 //
|
Chris@23
|
1016 // count = 0;
|
Chris@23
|
1017 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
1018 // Feature f4 = *it;
|
Chris@23
|
1019 // Feature f8; // musicity
|
Chris@23
|
1020 // Feature f9; // musicity segmenter
|
Chris@23
|
1021 //
|
Chris@23
|
1022 // f8.hasTimestamp = true;
|
Chris@23
|
1023 // f8.timestamp = f4.timestamp;
|
Chris@23
|
1024 // f9.hasTimestamp = true;
|
Chris@23
|
1025 // f9.timestamp = f4.timestamp;
|
Chris@23
|
1026 //
|
Chris@23
|
1027 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
1028 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
1029 // int musicityCount = 0;
|
Chris@23
|
1030 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
1031 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
1032 // }
|
Chris@23
|
1033 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
1034 //
|
Chris@23
|
1035 // if (isSpeech) {
|
Chris@23
|
1036 // if (oldlabeltype != 2) {
|
Chris@23
|
1037 // f9.label = "Speech";
|
Chris@23
|
1038 // fsOut[9].push_back(f9);
|
Chris@23
|
1039 // oldlabeltype = 2;
|
Chris@23
|
1040 // }
|
Chris@23
|
1041 // } else {
|
Chris@23
|
1042 // if (oldlabeltype != 1) {
|
Chris@23
|
1043 // f9.label = "Music";
|
Chris@23
|
1044 // fsOut[9].push_back(f9);
|
Chris@23
|
1045 // oldlabeltype = 1;
|
Chris@23
|
1046 // }
|
Chris@23
|
1047 // }
|
Chris@23
|
1048 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1049 // fsOut[8].push_back(f8);
|
Chris@23
|
1050 // count++;
|
Chris@23
|
1051 // }
|
Chris@23
|
1052 return fsOut;
|
matthiasm@0
|
1053
|
matthiasm@0
|
1054 }
|
matthiasm@0
|
1055
|
Chris@35
|
1056 #endif
|