Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@35
|
31 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
32 Plugin(inputSampleRate),
|
Chris@35
|
33 m_logSpectrum(0),
|
Chris@23
|
34 m_blockSize(0),
|
Chris@23
|
35 m_stepSize(0),
|
Chris@23
|
36 m_lengthOfNoteIndex(0),
|
mail@80
|
37 m_meanTunings(0),
|
mail@80
|
38 m_localTunings(0),
|
mail@41
|
39 m_whitening(1.0),
|
Chris@23
|
40 m_preset(0.0),
|
Chris@23
|
41 m_localTuning(0),
|
Chris@23
|
42 m_kernelValue(0),
|
Chris@23
|
43 m_kernelFftIndex(0),
|
Chris@23
|
44 m_kernelNoteIndex(0),
|
Chris@23
|
45 m_dict(0),
|
mail@60
|
46 m_tuneLocal(0),
|
Chris@23
|
47 m_chorddict(0),
|
Chris@23
|
48 m_chordnames(0),
|
Chris@23
|
49 m_doNormalizeChroma(0),
|
mail@60
|
50 m_rollon(0),
|
matthiasm@42
|
51 m_s(0.7),
|
matthiasm@50
|
52 m_useNNLS(1),
|
mail@80
|
53 m_useHMM(1),
|
mail@80
|
54 sinvalues(0),
|
mail@80
|
55 cosvalues(0)
|
matthiasm@0
|
56 {
|
Chris@35
|
57 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
58
|
Chris@23
|
59 // make the *note* dictionary matrix
|
Chris@23
|
60 m_dict = new float[nNote * 84];
|
Chris@23
|
61 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
62 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@7
|
63
|
Chris@23
|
64 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
65 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
66 }
|
matthiasm@0
|
67
|
matthiasm@0
|
68
|
Chris@35
|
69 NNLSBase::~NNLSBase()
|
matthiasm@0
|
70 {
|
Chris@35
|
71 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
72 delete [] m_dict;
|
matthiasm@0
|
73 }
|
matthiasm@0
|
74
|
matthiasm@0
|
75 string
|
Chris@35
|
76 NNLSBase::getMaker() const
|
matthiasm@0
|
77 {
|
Chris@23
|
78 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
79 // Your name here
|
matthiasm@0
|
80 return "Matthias Mauch";
|
matthiasm@0
|
81 }
|
matthiasm@0
|
82
|
matthiasm@0
|
83 int
|
Chris@35
|
84 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
85 {
|
Chris@23
|
86 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
87 // Increment this each time you release a version that behaves
|
matthiasm@0
|
88 // differently from the previous one
|
matthiasm@0
|
89 return 1;
|
matthiasm@0
|
90 }
|
matthiasm@0
|
91
|
matthiasm@0
|
92 string
|
Chris@35
|
93 NNLSBase::getCopyright() const
|
matthiasm@0
|
94 {
|
Chris@23
|
95 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
96 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
97 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
98 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
99 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
100 return "GPL";
|
matthiasm@0
|
101 }
|
matthiasm@0
|
102
|
Chris@35
|
103 NNLSBase::InputDomain
|
Chris@35
|
104 NNLSBase::getInputDomain() const
|
matthiasm@0
|
105 {
|
Chris@23
|
106 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
107 return FrequencyDomain;
|
matthiasm@0
|
108 }
|
matthiasm@0
|
109
|
matthiasm@0
|
110 size_t
|
Chris@35
|
111 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
112 {
|
Chris@23
|
113 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
114 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
115 }
|
matthiasm@0
|
116
|
matthiasm@0
|
117 size_t
|
Chris@35
|
118 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
119 {
|
Chris@23
|
120 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
121 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
122 // means the same as the block size for TimeDomain
|
Chris@23
|
123 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
124 }
|
matthiasm@0
|
125
|
matthiasm@0
|
126 size_t
|
Chris@35
|
127 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
128 {
|
Chris@23
|
129 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
130 return 1;
|
matthiasm@0
|
131 }
|
matthiasm@0
|
132
|
matthiasm@0
|
133 size_t
|
Chris@35
|
134 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
135 {
|
Chris@23
|
136 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
137 return 1;
|
matthiasm@0
|
138 }
|
matthiasm@0
|
139
|
Chris@35
|
140 NNLSBase::ParameterList
|
Chris@35
|
141 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
142 {
|
Chris@23
|
143 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
144 ParameterList list;
|
matthiasm@0
|
145
|
matthiasm@42
|
146 ParameterDescriptor d;
|
matthiasm@42
|
147 d.identifier = "useNNLS";
|
matthiasm@42
|
148 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
149 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
150 d.unit = "";
|
matthiasm@42
|
151 d.minValue = 0.0;
|
matthiasm@42
|
152 d.maxValue = 1.0;
|
matthiasm@42
|
153 d.defaultValue = 1.0;
|
matthiasm@42
|
154 d.isQuantized = true;
|
matthiasm@42
|
155 d.quantizeStep = 1.0;
|
matthiasm@42
|
156 list.push_back(d);
|
matthiasm@42
|
157
|
mail@41
|
158 ParameterDescriptor d0;
|
mail@41
|
159 d0.identifier = "rollon";
|
mail@41
|
160 d0.name = "spectral roll-on";
|
matthiasm@58
|
161 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
162 d0.unit = "%";
|
mail@41
|
163 d0.minValue = 0;
|
matthiasm@59
|
164 d0.maxValue = 5;
|
mail@41
|
165 d0.defaultValue = 0;
|
matthiasm@48
|
166 d0.isQuantized = true;
|
matthiasm@59
|
167 d0.quantizeStep = 0.5;
|
mail@41
|
168 list.push_back(d0);
|
matthiasm@4
|
169
|
matthiasm@4
|
170 ParameterDescriptor d1;
|
matthiasm@4
|
171 d1.identifier = "tuningmode";
|
matthiasm@4
|
172 d1.name = "tuning mode";
|
matthiasm@4
|
173 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
174 d1.unit = "";
|
matthiasm@4
|
175 d1.minValue = 0;
|
matthiasm@4
|
176 d1.maxValue = 1;
|
matthiasm@4
|
177 d1.defaultValue = 0;
|
matthiasm@4
|
178 d1.isQuantized = true;
|
matthiasm@4
|
179 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
180 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
181 d1.quantizeStep = 1.0;
|
matthiasm@4
|
182 list.push_back(d1);
|
matthiasm@4
|
183
|
mail@41
|
184 ParameterDescriptor d2;
|
mail@41
|
185 d2.identifier = "whitening";
|
mail@41
|
186 d2.name = "spectral whitening";
|
mail@41
|
187 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
188 d2.unit = "";
|
mail@41
|
189 d2.isQuantized = true;
|
mail@41
|
190 d2.minValue = 0.0;
|
mail@41
|
191 d2.maxValue = 1.0;
|
mail@41
|
192 d2.defaultValue = 1.0;
|
mail@41
|
193 d2.isQuantized = false;
|
mail@41
|
194 list.push_back(d2);
|
mail@41
|
195
|
mail@41
|
196 ParameterDescriptor d3;
|
mail@41
|
197 d3.identifier = "s";
|
mail@41
|
198 d3.name = "spectral shape";
|
mail@41
|
199 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
200 d3.unit = "";
|
mail@41
|
201 d3.minValue = 0.5;
|
mail@41
|
202 d3.maxValue = 0.9;
|
mail@41
|
203 d3.defaultValue = 0.7;
|
mail@41
|
204 d3.isQuantized = false;
|
mail@41
|
205 list.push_back(d3);
|
mail@41
|
206
|
Chris@23
|
207 ParameterDescriptor d4;
|
matthiasm@12
|
208 d4.identifier = "chromanormalize";
|
matthiasm@12
|
209 d4.name = "chroma normalization";
|
matthiasm@12
|
210 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
211 d4.unit = "";
|
matthiasm@12
|
212 d4.minValue = 0;
|
matthiasm@13
|
213 d4.maxValue = 3;
|
matthiasm@12
|
214 d4.defaultValue = 0;
|
matthiasm@12
|
215 d4.isQuantized = true;
|
matthiasm@13
|
216 d4.valueNames.push_back("none");
|
matthiasm@13
|
217 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
218 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
219 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
220 d4.quantizeStep = 1.0;
|
matthiasm@12
|
221 list.push_back(d4);
|
matthiasm@4
|
222
|
matthiasm@0
|
223 return list;
|
matthiasm@0
|
224 }
|
matthiasm@0
|
225
|
matthiasm@0
|
226 float
|
Chris@35
|
227 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
228 {
|
Chris@23
|
229 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
230 if (identifier == "useNNLS") {
|
matthiasm@42
|
231 return m_useNNLS;
|
matthiasm@0
|
232 }
|
matthiasm@0
|
233
|
mail@41
|
234 if (identifier == "whitening") {
|
mail@41
|
235 return m_whitening;
|
mail@41
|
236 }
|
mail@41
|
237
|
mail@41
|
238 if (identifier == "s") {
|
mail@41
|
239 return m_s;
|
matthiasm@0
|
240 }
|
matthiasm@17
|
241
|
Chris@23
|
242 if (identifier == "rollon") {
|
matthiasm@17
|
243 return m_rollon;
|
matthiasm@17
|
244 }
|
matthiasm@0
|
245
|
matthiasm@0
|
246 if (identifier == "tuningmode") {
|
matthiasm@0
|
247 if (m_tuneLocal) {
|
matthiasm@0
|
248 return 1.0;
|
matthiasm@0
|
249 } else {
|
matthiasm@0
|
250 return 0.0;
|
matthiasm@0
|
251 }
|
matthiasm@0
|
252 }
|
Chris@23
|
253 if (identifier == "preset") {
|
Chris@23
|
254 return m_preset;
|
matthiasm@3
|
255 }
|
Chris@23
|
256 if (identifier == "chromanormalize") {
|
Chris@23
|
257 return m_doNormalizeChroma;
|
matthiasm@12
|
258 }
|
matthiasm@50
|
259
|
matthiasm@50
|
260 if (identifier == "useHMM") {
|
matthiasm@50
|
261 return m_useHMM;
|
matthiasm@50
|
262 }
|
matthiasm@50
|
263
|
matthiasm@0
|
264 return 0;
|
matthiasm@0
|
265
|
matthiasm@0
|
266 }
|
matthiasm@0
|
267
|
matthiasm@0
|
268 void
|
Chris@35
|
269 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
270 {
|
Chris@23
|
271 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
272 if (identifier == "useNNLS") {
|
matthiasm@42
|
273 m_useNNLS = (int) value;
|
matthiasm@0
|
274 }
|
matthiasm@0
|
275
|
mail@41
|
276 if (identifier == "whitening") {
|
mail@41
|
277 m_whitening = value;
|
matthiasm@0
|
278 }
|
matthiasm@0
|
279
|
mail@41
|
280 if (identifier == "s") {
|
mail@41
|
281 m_s = value;
|
mail@41
|
282 }
|
mail@41
|
283
|
matthiasm@50
|
284 if (identifier == "useHMM") {
|
matthiasm@50
|
285 m_useHMM = value;
|
matthiasm@50
|
286 }
|
matthiasm@50
|
287
|
matthiasm@0
|
288 if (identifier == "tuningmode") {
|
mail@60
|
289 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
290 m_tuneLocal = value;
|
matthiasm@0
|
291 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
292 }
|
matthiasm@42
|
293 // if (identifier == "preset") {
|
matthiasm@42
|
294 // m_preset = value;
|
matthiasm@42
|
295 // if (m_preset == 0.0) {
|
matthiasm@42
|
296 // m_tuneLocal = false;
|
matthiasm@42
|
297 // m_whitening = 1.0;
|
matthiasm@42
|
298 // m_dictID = 0.0;
|
matthiasm@42
|
299 // }
|
matthiasm@42
|
300 // if (m_preset == 1.0) {
|
matthiasm@42
|
301 // m_tuneLocal = false;
|
matthiasm@42
|
302 // m_whitening = 1.0;
|
matthiasm@42
|
303 // m_dictID = 1.0;
|
matthiasm@42
|
304 // }
|
matthiasm@42
|
305 // if (m_preset == 2.0) {
|
matthiasm@42
|
306 // m_tuneLocal = false;
|
matthiasm@42
|
307 // m_whitening = 0.7;
|
matthiasm@42
|
308 // m_dictID = 0.0;
|
matthiasm@42
|
309 // }
|
matthiasm@42
|
310 // }
|
Chris@23
|
311 if (identifier == "chromanormalize") {
|
Chris@23
|
312 m_doNormalizeChroma = value;
|
Chris@23
|
313 }
|
matthiasm@17
|
314
|
Chris@23
|
315 if (identifier == "rollon") {
|
Chris@23
|
316 m_rollon = value;
|
Chris@23
|
317 }
|
matthiasm@0
|
318 }
|
matthiasm@0
|
319
|
Chris@35
|
320 NNLSBase::ProgramList
|
Chris@35
|
321 NNLSBase::getPrograms() const
|
matthiasm@0
|
322 {
|
Chris@23
|
323 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
324 ProgramList list;
|
matthiasm@0
|
325
|
matthiasm@0
|
326 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
327 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
328
|
matthiasm@0
|
329 return list;
|
matthiasm@0
|
330 }
|
matthiasm@0
|
331
|
matthiasm@0
|
332 string
|
Chris@35
|
333 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
334 {
|
Chris@23
|
335 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
336 return ""; // no programs
|
matthiasm@0
|
337 }
|
matthiasm@0
|
338
|
matthiasm@0
|
339 void
|
Chris@35
|
340 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
341 {
|
Chris@23
|
342 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
343 }
|
matthiasm@0
|
344
|
matthiasm@0
|
345
|
matthiasm@0
|
346 bool
|
Chris@35
|
347 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
348 {
|
Chris@23
|
349 if (debug_on) {
|
Chris@23
|
350 cerr << "--> initialise";
|
Chris@23
|
351 }
|
matthiasm@1
|
352
|
mail@80
|
353 // make things for tuning estimation
|
mail@80
|
354 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
355 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
356 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
357 }
|
mail@80
|
358
|
mail@80
|
359
|
mail@80
|
360 // make hamming window of length 1/2 octave
|
mail@76
|
361 int hamwinlength = nBPS * 6 + 1;
|
mail@76
|
362 float hamwinsum = 0;
|
mail@76
|
363 for (int i = 0; i < hamwinlength; ++i) {
|
mail@76
|
364 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
mail@76
|
365 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
mail@76
|
366 }
|
mail@77
|
367 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
|
mail@80
|
368
|
mail@80
|
369
|
mail@80
|
370 // initialise the tuning
|
mail@80
|
371 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
372 m_meanTunings.push_back(0);
|
mail@80
|
373 m_localTunings.push_back(0);
|
mail@80
|
374 }
|
mail@76
|
375
|
matthiasm@0
|
376 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
377 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
378 m_blockSize = blockSize;
|
matthiasm@0
|
379 m_stepSize = stepSize;
|
Chris@35
|
380 m_frameCount = 0;
|
mail@77
|
381 int tempn = nNote * m_blockSize/2;
|
Chris@23
|
382 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
383 float *tempkernel;
|
matthiasm@1
|
384
|
Chris@23
|
385 tempkernel = new float[tempn];
|
matthiasm@1
|
386
|
Chris@23
|
387 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
388 m_kernelValue.clear();
|
Chris@23
|
389 m_kernelFftIndex.clear();
|
Chris@23
|
390 m_kernelNoteIndex.clear();
|
Chris@23
|
391 int countNonzero = 0;
|
Chris@23
|
392 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
393 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
394 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
395 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
396 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
397 countNonzero++;
|
Chris@23
|
398 }
|
Chris@23
|
399 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
400 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
401 }
|
Chris@23
|
402 }
|
Chris@23
|
403 }
|
Chris@23
|
404 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
405 delete [] tempkernel;
|
Chris@35
|
406 /*
|
Chris@23
|
407 ofstream myfile;
|
Chris@23
|
408 myfile.open ("matrix.txt");
|
matthiasm@3
|
409 // myfile << "Writing this to a file.\n";
|
Chris@23
|
410 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
411 myfile << m_dict[i] << endl;
|
Chris@23
|
412 }
|
matthiasm@3
|
413 myfile.close();
|
Chris@35
|
414 */
|
matthiasm@0
|
415 return true;
|
matthiasm@0
|
416 }
|
matthiasm@0
|
417
|
matthiasm@0
|
418 void
|
Chris@35
|
419 NNLSBase::reset()
|
matthiasm@0
|
420 {
|
Chris@23
|
421 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
422
|
matthiasm@0
|
423 // Clear buffers, reset stored values, etc
|
Chris@35
|
424 m_frameCount = 0;
|
matthiasm@42
|
425 // m_dictID = 0;
|
Chris@35
|
426 m_logSpectrum.clear();
|
mail@80
|
427 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
428 m_meanTunings[iBPS] = 0;
|
mail@80
|
429 m_localTunings[iBPS] = 0;
|
mail@80
|
430 }
|
Chris@23
|
431 m_localTuning.clear();
|
matthiasm@0
|
432 }
|
matthiasm@0
|
433
|
Chris@35
|
434 void
|
Chris@35
|
435 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
436 {
|
Chris@35
|
437 m_frameCount++;
|
Chris@23
|
438 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
439
|
Chris@23
|
440 const float *fbuf = inputBuffers[0];
|
Chris@23
|
441 float energysum = 0;
|
Chris@23
|
442 // make magnitude
|
Chris@23
|
443 float maxmag = -10000;
|
Chris@23
|
444 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
445 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
446 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
447 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
448 if (m_rollon > 0) {
|
Chris@23
|
449 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
450 }
|
Chris@23
|
451 }
|
matthiasm@14
|
452
|
Chris@23
|
453 float cumenergy = 0;
|
Chris@23
|
454 if (m_rollon > 0) {
|
Chris@23
|
455 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
456 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
457 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
458 else break;
|
Chris@23
|
459 }
|
Chris@23
|
460 }
|
matthiasm@17
|
461
|
Chris@23
|
462 if (maxmag < 2) {
|
Chris@23
|
463 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
464 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
465 magnitude[iBin] = 0;
|
Chris@23
|
466 }
|
Chris@23
|
467 }
|
matthiasm@4
|
468
|
Chris@23
|
469 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
470 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
471 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
472 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
473 }
|
Chris@23
|
474 int binCount = 0;
|
Chris@23
|
475 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
476 // cerr << ".";
|
Chris@23
|
477 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
478 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
479 binCount++;
|
Chris@23
|
480 }
|
Chris@23
|
481 // cerr << nm[20];
|
Chris@23
|
482 // cerr << endl;
|
matthiasm@0
|
483
|
matthiasm@0
|
484
|
Chris@35
|
485 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
486 // update means of complex tuning variables
|
mail@80
|
487 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
|
mail@80
|
488
|
mail@80
|
489 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
|
mail@80
|
490 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
|
Chris@23
|
491 float ratioOld = 0.997;
|
mail@80
|
492 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
493 m_localTunings[iBPS] *= ratioOld;
|
mail@80
|
494 m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
|
mail@80
|
495 }
|
matthiasm@0
|
496 }
|
matthiasm@0
|
497 // if (m_tuneLocal) {
|
Chris@23
|
498 // local tuning
|
mail@80
|
499 // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
|
mail@80
|
500 // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
|
mail@80
|
501
|
mail@80
|
502 float localTuningImag = 0;
|
mail@80
|
503 float localTuningReal = 0;
|
mail@80
|
504 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
505 localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
506 localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
507 }
|
mail@80
|
508
|
Chris@23
|
509 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
510 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
511
|
Chris@23
|
512 Feature f1; // logfreqspec
|
Chris@23
|
513 f1.hasTimestamp = true;
|
matthiasm@0
|
514 f1.timestamp = timestamp;
|
Chris@23
|
515 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
516 f1.values.push_back(nm[iNote]);
|
Chris@23
|
517 }
|
matthiasm@0
|
518
|
matthiasm@0
|
519 // deletes
|
matthiasm@0
|
520 delete[] magnitude;
|
matthiasm@0
|
521 delete[] nm;
|
matthiasm@0
|
522
|
Chris@35
|
523 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
524 }
|
matthiasm@0
|
525
|
Chris@35
|
526
|
Chris@35
|
527 #ifdef NOT_DEFINED
|
Chris@35
|
528
|
Chris@35
|
529 NNLSBase::FeatureSet
|
Chris@35
|
530 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
531 {
|
mail@81
|
532 // if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
mail@81
|
533 FeatureSet fsOut;
|
mail@81
|
534 // if (m_logSpectrum.size() == 0) return fsOut;
|
mail@81
|
535 // int nChord = m_chordnames.size();
|
mail@81
|
536 // //
|
mail@81
|
537 // /** Calculate Tuning
|
mail@81
|
538 // calculate tuning from (using the angle of the complex number defined by the
|
mail@81
|
539 // cumulative mean real and imag values)
|
mail@81
|
540 // **/
|
mail@81
|
541 // float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
|
mail@81
|
542 // float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
|
mail@81
|
543 // float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
mail@81
|
544 // float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
mail@81
|
545 // int intShift = floor(normalisedtuning * 3);
|
mail@81
|
546 // float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
mail@81
|
547 //
|
mail@81
|
548 // char buffer0 [50];
|
mail@81
|
549 //
|
mail@81
|
550 // sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
mail@81
|
551 //
|
mail@81
|
552 // // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
mail@81
|
553 //
|
mail@81
|
554 // // push tuning to FeatureSet fsOut
|
mail@81
|
555 // Feature f0; // tuning
|
mail@81
|
556 // f0.hasTimestamp = true;
|
mail@81
|
557 // f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
mail@81
|
558 // f0.label = buffer0;
|
mail@81
|
559 // fsOut[0].push_back(f0);
|
mail@81
|
560 //
|
mail@81
|
561 // /** Tune Log-Frequency Spectrogram
|
mail@81
|
562 // calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
mail@81
|
563 // perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
mail@81
|
564 // **/
|
mail@81
|
565 // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
mail@81
|
566 //
|
mail@81
|
567 // float tempValue = 0;
|
mail@81
|
568 // float dbThreshold = 0; // relative to the background spectrum
|
mail@81
|
569 // float thresh = pow(10,dbThreshold/20);
|
mail@81
|
570 // // cerr << "tune local ? " << m_tuneLocal << endl;
|
mail@81
|
571 // int count = 0;
|
mail@81
|
572 //
|
mail@81
|
573 // for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
mail@81
|
574 // Feature f1 = *i;
|
mail@81
|
575 // Feature f2; // tuned log-frequency spectrum
|
mail@81
|
576 // f2.hasTimestamp = true;
|
mail@81
|
577 // f2.timestamp = f1.timestamp;
|
mail@81
|
578 // f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
mail@81
|
579 //
|
mail@81
|
580 // if (m_tuneLocal == 1.0) {
|
mail@81
|
581 // intShift = floor(m_localTuning[count] * 3);
|
mail@81
|
582 // floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
mail@81
|
583 // }
|
mail@81
|
584 //
|
mail@81
|
585 // // cerr << intShift << " " << floatShift << endl;
|
mail@81
|
586 //
|
mail@81
|
587 // for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@81
|
588 // tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
|
mail@81
|
589 // f2.values.push_back(tempValue);
|
mail@81
|
590 // }
|
mail@81
|
591 //
|
mail@81
|
592 // f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
mail@81
|
593 // vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
mail@81
|
594 // vector<float> runningstd;
|
mail@81
|
595 // for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
mail@81
|
596 // runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
mail@81
|
597 // }
|
mail@81
|
598 // runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@81
|
599 // for (int i = 0; i < nNote; i++) {
|
mail@81
|
600 // runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
mail@81
|
601 // if (runningstd[i] > 0) {
|
mail@81
|
602 // // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@81
|
603 // // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
mail@81
|
604 // f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@81
|
605 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
mail@81
|
606 // }
|
mail@81
|
607 // if (f2.values[i] < 0) {
|
mail@81
|
608 // cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
mail@81
|
609 // }
|
mail@81
|
610 // }
|
mail@81
|
611 // fsOut[2].push_back(f2);
|
mail@81
|
612 // count++;
|
mail@81
|
613 // }
|
mail@81
|
614 // cerr << "done." << endl;
|
mail@81
|
615 //
|
mail@81
|
616 // /** Semitone spectrum and chromagrams
|
mail@81
|
617 // Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
mail@81
|
618 // is inferred using a non-negative least squares algorithm.
|
mail@81
|
619 // Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
mail@81
|
620 // bass and treble stacked onto each other).
|
mail@81
|
621 // **/
|
mail@81
|
622 // if (m_useNNLS == 0) {
|
mail@81
|
623 // cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
mail@81
|
624 // } else {
|
mail@81
|
625 // cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
mail@81
|
626 // }
|
Chris@23
|
627 //
|
mail@81
|
628 //
|
mail@81
|
629 // vector<vector<float> > chordogram;
|
mail@81
|
630 // vector<vector<int> > scoreChordogram;
|
mail@81
|
631 // vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
mail@81
|
632 // vector<float> oldchroma = vector<float>(12,0);
|
mail@81
|
633 // vector<float> oldbasschroma = vector<float>(12,0);
|
mail@81
|
634 // count = 0;
|
mail@81
|
635 //
|
mail@81
|
636 // for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
mail@81
|
637 // Feature f2 = *it; // logfreq spectrum
|
mail@81
|
638 // Feature f3; // semitone spectrum
|
mail@81
|
639 // Feature f4; // treble chromagram
|
mail@81
|
640 // Feature f5; // bass chromagram
|
mail@81
|
641 // Feature f6; // treble and bass chromagram
|
mail@81
|
642 //
|
mail@81
|
643 // f3.hasTimestamp = true;
|
mail@81
|
644 // f3.timestamp = f2.timestamp;
|
mail@81
|
645 //
|
mail@81
|
646 // f4.hasTimestamp = true;
|
mail@81
|
647 // f4.timestamp = f2.timestamp;
|
mail@81
|
648 //
|
mail@81
|
649 // f5.hasTimestamp = true;
|
mail@81
|
650 // f5.timestamp = f2.timestamp;
|
mail@81
|
651 //
|
mail@81
|
652 // f6.hasTimestamp = true;
|
mail@81
|
653 // f6.timestamp = f2.timestamp;
|
mail@81
|
654 //
|
mail@81
|
655 // float b[nNote];
|
mail@81
|
656 //
|
mail@81
|
657 // bool some_b_greater_zero = false;
|
mail@81
|
658 // float sumb = 0;
|
mail@81
|
659 // for (int i = 0; i < nNote; i++) {
|
mail@81
|
660 // // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
mail@81
|
661 // b[i] = f2.values[i];
|
mail@81
|
662 // sumb += b[i];
|
mail@81
|
663 // if (b[i] > 0) {
|
mail@81
|
664 // some_b_greater_zero = true;
|
mail@81
|
665 // }
|
mail@81
|
666 // }
|
mail@81
|
667 //
|
mail@81
|
668 // // here's where the non-negative least squares algorithm calculates the note activation x
|
mail@81
|
669 //
|
mail@81
|
670 // vector<float> chroma = vector<float>(12, 0);
|
mail@81
|
671 // vector<float> basschroma = vector<float>(12, 0);
|
mail@81
|
672 // float currval;
|
mail@81
|
673 // unsigned iSemitone = 0;
|
mail@81
|
674 //
|
mail@81
|
675 // if (some_b_greater_zero) {
|
mail@81
|
676 // if (m_useNNLS == 0) {
|
mail@81
|
677 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
mail@81
|
678 // currval = 0;
|
mail@81
|
679 // currval += b[iNote + 1 + -1] * 0.5;
|
mail@81
|
680 // currval += b[iNote + 1 + 0] * 1.0;
|
mail@81
|
681 // currval += b[iNote + 1 + 1] * 0.5;
|
mail@81
|
682 // f3.values.push_back(currval);
|
mail@81
|
683 // chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
mail@81
|
684 // basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
mail@81
|
685 // iSemitone++;
|
mail@81
|
686 // }
|
mail@81
|
687 //
|
mail@81
|
688 // } else {
|
mail@81
|
689 // float x[84+1000];
|
mail@81
|
690 // for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
mail@81
|
691 // vector<int> signifIndex;
|
mail@81
|
692 // int index=0;
|
mail@81
|
693 // sumb /= 84.0;
|
mail@81
|
694 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
mail@81
|
695 // float currval = 0;
|
mail@81
|
696 // currval += b[iNote + 1 + -1];
|
mail@81
|
697 // currval += b[iNote + 1 + 0];
|
mail@81
|
698 // currval += b[iNote + 1 + 1];
|
mail@81
|
699 // if (currval > 0) signifIndex.push_back(index);
|
mail@81
|
700 // f3.values.push_back(0); // fill the values, change later
|
mail@81
|
701 // index++;
|
mail@81
|
702 // }
|
mail@81
|
703 // float rnorm;
|
mail@81
|
704 // float w[84+1000];
|
mail@81
|
705 // float zz[84+1000];
|
mail@81
|
706 // int indx[84+1000];
|
mail@81
|
707 // int mode;
|
mail@81
|
708 // int dictsize = nNote*signifIndex.size();
|
mail@81
|
709 // // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
mail@81
|
710 // float *curr_dict = new float[dictsize];
|
mail@81
|
711 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@81
|
712 // for (unsigned iBin = 0; iBin < nNote; iBin++) {
|
mail@81
|
713 // curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
mail@81
|
714 // }
|
mail@81
|
715 // }
|
mail@81
|
716 // nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
mail@81
|
717 // delete [] curr_dict;
|
mail@81
|
718 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@81
|
719 // f3.values[signifIndex[iNote]] = x[iNote];
|
mail@81
|
720 // // cerr << mode << endl;
|
mail@81
|
721 // chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
mail@81
|
722 // basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
mail@81
|
723 // }
|
mail@81
|
724 // }
|
mail@81
|
725 // }
|
mail@81
|
726 //
|
mail@81
|
727 //
|
mail@81
|
728 //
|
mail@81
|
729 //
|
mail@81
|
730 // f4.values = chroma;
|
mail@81
|
731 // f5.values = basschroma;
|
mail@81
|
732 // chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
mail@81
|
733 // f6.values = chroma;
|
mail@81
|
734 //
|
mail@81
|
735 // if (m_doNormalizeChroma > 0) {
|
mail@81
|
736 // vector<float> chromanorm = vector<float>(3,0);
|
mail@81
|
737 // switch (int(m_doNormalizeChroma)) {
|
mail@81
|
738 // case 0: // should never end up here
|
mail@81
|
739 // break;
|
mail@81
|
740 // case 1:
|
mail@81
|
741 // chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
mail@81
|
742 // chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
mail@81
|
743 // chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
mail@81
|
744 // break;
|
mail@81
|
745 // case 2:
|
mail@81
|
746 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
mail@81
|
747 // chromanorm[0] += *it;
|
mail@81
|
748 // }
|
mail@81
|
749 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
mail@81
|
750 // chromanorm[1] += *it;
|
mail@81
|
751 // }
|
mail@81
|
752 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
mail@81
|
753 // chromanorm[2] += *it;
|
mail@81
|
754 // }
|
mail@81
|
755 // break;
|
mail@81
|
756 // case 3:
|
mail@81
|
757 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
mail@81
|
758 // chromanorm[0] += pow(*it,2);
|
mail@81
|
759 // }
|
mail@81
|
760 // chromanorm[0] = sqrt(chromanorm[0]);
|
mail@81
|
761 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
mail@81
|
762 // chromanorm[1] += pow(*it,2);
|
mail@81
|
763 // }
|
mail@81
|
764 // chromanorm[1] = sqrt(chromanorm[1]);
|
mail@81
|
765 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
mail@81
|
766 // chromanorm[2] += pow(*it,2);
|
mail@81
|
767 // }
|
mail@81
|
768 // chromanorm[2] = sqrt(chromanorm[2]);
|
mail@81
|
769 // break;
|
mail@81
|
770 // }
|
mail@81
|
771 // if (chromanorm[0] > 0) {
|
mail@81
|
772 // for (int i = 0; i < f4.values.size(); i++) {
|
mail@81
|
773 // f4.values[i] /= chromanorm[0];
|
mail@81
|
774 // }
|
mail@81
|
775 // }
|
mail@81
|
776 // if (chromanorm[1] > 0) {
|
mail@81
|
777 // for (int i = 0; i < f5.values.size(); i++) {
|
mail@81
|
778 // f5.values[i] /= chromanorm[1];
|
mail@81
|
779 // }
|
mail@81
|
780 // }
|
mail@81
|
781 // if (chromanorm[2] > 0) {
|
mail@81
|
782 // for (int i = 0; i < f6.values.size(); i++) {
|
mail@81
|
783 // f6.values[i] /= chromanorm[2];
|
mail@81
|
784 // }
|
mail@81
|
785 // }
|
mail@81
|
786 //
|
mail@81
|
787 // }
|
mail@81
|
788 //
|
mail@81
|
789 // // local chord estimation
|
mail@81
|
790 // vector<float> currentChordSalience;
|
mail@81
|
791 // float tempchordvalue = 0;
|
mail@81
|
792 // float sumchordvalue = 0;
|
mail@81
|
793 //
|
mail@81
|
794 // for (int iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
795 // tempchordvalue = 0;
|
mail@81
|
796 // for (int iBin = 0; iBin < 12; iBin++) {
|
mail@81
|
797 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
mail@81
|
798 // }
|
mail@81
|
799 // for (int iBin = 12; iBin < 24; iBin++) {
|
mail@81
|
800 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
mail@81
|
801 // }
|
mail@81
|
802 // sumchordvalue+=tempchordvalue;
|
mail@81
|
803 // currentChordSalience.push_back(tempchordvalue);
|
mail@81
|
804 // }
|
mail@81
|
805 // if (sumchordvalue > 0) {
|
mail@81
|
806 // for (int iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
807 // currentChordSalience[iChord] /= sumchordvalue;
|
mail@81
|
808 // }
|
mail@81
|
809 // } else {
|
mail@81
|
810 // currentChordSalience[nChord-1] = 1.0;
|
mail@81
|
811 // }
|
mail@81
|
812 // chordogram.push_back(currentChordSalience);
|
mail@81
|
813 //
|
mail@81
|
814 // fsOut[3].push_back(f3);
|
mail@81
|
815 // fsOut[4].push_back(f4);
|
mail@81
|
816 // fsOut[5].push_back(f5);
|
mail@81
|
817 // fsOut[6].push_back(f6);
|
mail@81
|
818 // count++;
|
mail@81
|
819 // }
|
mail@81
|
820 // cerr << "done." << endl;
|
mail@81
|
821 //
|
mail@81
|
822 //
|
mail@81
|
823 // /* Simple chord estimation
|
mail@81
|
824 // I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
mail@81
|
825 // take the maximum. Very simple, don't do this at home...
|
mail@81
|
826 // */
|
mail@81
|
827 // cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
mail@81
|
828 // count = 0;
|
mail@81
|
829 // int halfwindowlength = m_inputSampleRate / m_stepSize;
|
mail@81
|
830 // vector<int> chordSequence;
|
mail@81
|
831 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
mail@81
|
832 // vector<int> temp = vector<int>(nChord,0);
|
mail@81
|
833 // scoreChordogram.push_back(temp);
|
mail@81
|
834 // }
|
mail@81
|
835 // for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
mail@81
|
836 // int startIndex = count + 1;
|
mail@81
|
837 // int endIndex = count + 2 * halfwindowlength;
|
mail@81
|
838 //
|
mail@81
|
839 // float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
mail@81
|
840 //
|
mail@81
|
841 // vector<int> chordCandidates;
|
mail@81
|
842 // for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
mail@81
|
843 // // float currsum = 0;
|
mail@81
|
844 // // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
mail@81
|
845 // // currsum += chordogram[iFrame][iChord];
|
mail@81
|
846 // // }
|
mail@81
|
847 // // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
mail@81
|
848 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
mail@81
|
849 // if (chordogram[iFrame][iChord] > chordThreshold) {
|
mail@81
|
850 // chordCandidates.push_back(iChord);
|
mail@81
|
851 // break;
|
mail@81
|
852 // }
|
mail@81
|
853 // }
|
mail@81
|
854 // }
|
mail@81
|
855 // chordCandidates.push_back(nChord-1);
|
mail@81
|
856 // // cerr << chordCandidates.size() << endl;
|
mail@81
|
857 //
|
mail@81
|
858 // float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
mail@81
|
859 // float maxindex = 0; //... and the index thereof
|
mail@81
|
860 // unsigned bestchordL = nChord-1; // index of the best "left" chord
|
mail@81
|
861 // unsigned bestchordR = nChord-1; // index of the best "right" chord
|
mail@81
|
862 //
|
mail@81
|
863 // for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
mail@81
|
864 // // now find the max values on both sides of iWF
|
mail@81
|
865 // // left side:
|
mail@81
|
866 // float maxL = 0;
|
mail@81
|
867 // unsigned maxindL = nChord-1;
|
mail@81
|
868 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
mail@81
|
869 // unsigned iChord = chordCandidates[kChord];
|
mail@81
|
870 // float currsum = 0;
|
mail@81
|
871 // for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
mail@81
|
872 // currsum += chordogram[count+iFrame][iChord];
|
mail@81
|
873 // }
|
mail@81
|
874 // if (iChord == nChord-1) currsum *= 0.8;
|
mail@81
|
875 // if (currsum > maxL) {
|
mail@81
|
876 // maxL = currsum;
|
mail@81
|
877 // maxindL = iChord;
|
mail@81
|
878 // }
|
mail@81
|
879 // }
|
mail@81
|
880 // // right side:
|
mail@81
|
881 // float maxR = 0;
|
mail@81
|
882 // unsigned maxindR = nChord-1;
|
mail@81
|
883 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
mail@81
|
884 // unsigned iChord = chordCandidates[kChord];
|
mail@81
|
885 // float currsum = 0;
|
mail@81
|
886 // for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
mail@81
|
887 // currsum += chordogram[count+iFrame][iChord];
|
mail@81
|
888 // }
|
mail@81
|
889 // if (iChord == nChord-1) currsum *= 0.8;
|
mail@81
|
890 // if (currsum > maxR) {
|
mail@81
|
891 // maxR = currsum;
|
mail@81
|
892 // maxindR = iChord;
|
mail@81
|
893 // }
|
mail@81
|
894 // }
|
mail@81
|
895 // if (maxL+maxR > maxval) {
|
mail@81
|
896 // maxval = maxL+maxR;
|
mail@81
|
897 // maxindex = iWF;
|
mail@81
|
898 // bestchordL = maxindL;
|
mail@81
|
899 // bestchordR = maxindR;
|
mail@81
|
900 // }
|
mail@81
|
901 //
|
mail@81
|
902 // }
|
mail@81
|
903 // // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
mail@81
|
904 // // add a score to every chord-frame-point that was part of a maximum
|
mail@81
|
905 // for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
mail@81
|
906 // scoreChordogram[iFrame+count][bestchordL]++;
|
mail@81
|
907 // }
|
mail@81
|
908 // for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
mail@81
|
909 // scoreChordogram[iFrame+count][bestchordR]++;
|
mail@81
|
910 // }
|
mail@81
|
911 // if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
mail@81
|
912 // count++;
|
mail@81
|
913 // }
|
mail@81
|
914 // // cerr << "******* agent finished *******" << endl;
|
mail@81
|
915 // count = 0;
|
mail@81
|
916 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
mail@81
|
917 // float maxval = 0; // will be the value of the most salient chord in this frame
|
mail@81
|
918 // float maxindex = 0; //... and the index thereof
|
mail@81
|
919 // for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
920 // if (scoreChordogram[count][iChord] > maxval) {
|
mail@81
|
921 // maxval = scoreChordogram[count][iChord];
|
mail@81
|
922 // maxindex = iChord;
|
mail@81
|
923 // // cerr << iChord << endl;
|
mail@81
|
924 // }
|
mail@81
|
925 // }
|
mail@81
|
926 // chordSequence.push_back(maxindex);
|
mail@81
|
927 // // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
mail@81
|
928 // count++;
|
mail@81
|
929 // }
|
mail@81
|
930 // // cerr << "******* mode filter done *******" << endl;
|
mail@81
|
931 //
|
mail@81
|
932 //
|
mail@81
|
933 // // mode filter on chordSequence
|
mail@81
|
934 // count = 0;
|
mail@81
|
935 // string oldChord = "";
|
mail@81
|
936 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
mail@81
|
937 // Feature f6 = *it;
|
mail@81
|
938 // Feature f7; // chord estimate
|
mail@81
|
939 // f7.hasTimestamp = true;
|
mail@81
|
940 // f7.timestamp = f6.timestamp;
|
mail@81
|
941 // Feature f8; // chord estimate
|
mail@81
|
942 // f8.hasTimestamp = true;
|
mail@81
|
943 // f8.timestamp = f6.timestamp;
|
mail@81
|
944 //
|
mail@81
|
945 // vector<int> chordCount = vector<int>(nChord,0);
|
mail@81
|
946 // int maxChordCount = 0;
|
mail@81
|
947 // int maxChordIndex = nChord-1;
|
mail@81
|
948 // string maxChord;
|
mail@81
|
949 // int startIndex = max(count - halfwindowlength/2,0);
|
mail@81
|
950 // int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
mail@81
|
951 // for (int i = startIndex; i < endIndex; i++) {
|
mail@81
|
952 // chordCount[chordSequence[i]]++;
|
mail@81
|
953 // if (chordCount[chordSequence[i]] > maxChordCount) {
|
mail@81
|
954 // // cerr << "start index " << startIndex << endl;
|
mail@81
|
955 // maxChordCount++;
|
mail@81
|
956 // maxChordIndex = chordSequence[i];
|
mail@81
|
957 // maxChord = m_chordnames[maxChordIndex];
|
mail@81
|
958 // }
|
mail@81
|
959 // }
|
mail@81
|
960 // // chordSequence[count] = maxChordIndex;
|
mail@81
|
961 // // cerr << maxChordIndex << endl;
|
mail@81
|
962 // f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
mail@81
|
963 // // cerr << chordchange[count] << endl;
|
mail@81
|
964 // fsOut[9].push_back(f8);
|
mail@81
|
965 // if (oldChord != maxChord) {
|
mail@81
|
966 // oldChord = maxChord;
|
mail@81
|
967 //
|
mail@81
|
968 // // char buffer1 [50];
|
mail@81
|
969 // // if (maxChordIndex < nChord - 1) {
|
mail@81
|
970 // // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
mail@81
|
971 // // } else {
|
mail@81
|
972 // // sprintf(buffer1, "N");
|
mail@81
|
973 // // }
|
mail@81
|
974 // // f7.label = buffer1;
|
mail@81
|
975 // f7.label = m_chordnames[maxChordIndex];
|
mail@81
|
976 // fsOut[7].push_back(f7);
|
mail@81
|
977 // }
|
mail@81
|
978 // count++;
|
mail@81
|
979 // }
|
mail@81
|
980 // Feature f7; // last chord estimate
|
mail@81
|
981 // f7.hasTimestamp = true;
|
mail@81
|
982 // f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
mail@81
|
983 // f7.label = "N";
|
mail@81
|
984 // fsOut[7].push_back(f7);
|
mail@81
|
985 // cerr << "done." << endl;
|
mail@81
|
986 // // // musicity
|
mail@81
|
987 // // count = 0;
|
mail@81
|
988 // // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
mail@81
|
989 // // vector<float> musicityValue;
|
mail@81
|
990 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
mail@81
|
991 // // Feature f4 = *it;
|
mail@81
|
992 // //
|
mail@81
|
993 // // int startIndex = max(count - musicitykernelwidth/2,0);
|
mail@81
|
994 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
mail@81
|
995 // // float chromasum = 0;
|
mail@81
|
996 // // float diffsum = 0;
|
mail@81
|
997 // // for (int k = 0; k < 12; k++) {
|
mail@81
|
998 // // for (int i = startIndex + 1; i < endIndex; i++) {
|
mail@81
|
999 // // chromasum += pow(fsOut[4][i].values[k],2);
|
mail@81
|
1000 // // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
mail@81
|
1001 // // }
|
mail@81
|
1002 // // }
|
mail@81
|
1003 // // diffsum /= chromasum;
|
mail@81
|
1004 // // musicityValue.push_back(diffsum);
|
mail@81
|
1005 // // count++;
|
mail@81
|
1006 // // }
|
mail@81
|
1007 // //
|
mail@81
|
1008 // // float musicityThreshold = 0.44;
|
mail@81
|
1009 // // if (m_stepSize == 4096) {
|
mail@81
|
1010 // // musicityThreshold = 0.74;
|
mail@81
|
1011 // // }
|
mail@81
|
1012 // // if (m_stepSize == 4410) {
|
mail@81
|
1013 // // musicityThreshold = 0.77;
|
mail@81
|
1014 // // }
|
mail@81
|
1015 // //
|
mail@81
|
1016 // // count = 0;
|
mail@81
|
1017 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
mail@81
|
1018 // // Feature f4 = *it;
|
mail@81
|
1019 // // Feature f8; // musicity
|
mail@81
|
1020 // // Feature f9; // musicity segmenter
|
mail@81
|
1021 // //
|
mail@81
|
1022 // // f8.hasTimestamp = true;
|
mail@81
|
1023 // // f8.timestamp = f4.timestamp;
|
mail@81
|
1024 // // f9.hasTimestamp = true;
|
mail@81
|
1025 // // f9.timestamp = f4.timestamp;
|
mail@81
|
1026 // //
|
mail@81
|
1027 // // int startIndex = max(count - musicitykernelwidth/2,0);
|
mail@81
|
1028 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
mail@81
|
1029 // // int musicityCount = 0;
|
mail@81
|
1030 // // for (int i = startIndex; i <= endIndex; i++) {
|
mail@81
|
1031 // // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
mail@81
|
1032 // // }
|
mail@81
|
1033 // // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
mail@81
|
1034 // //
|
mail@81
|
1035 // // if (isSpeech) {
|
mail@81
|
1036 // // if (oldlabeltype != 2) {
|
mail@81
|
1037 // // f9.label = "Speech";
|
mail@81
|
1038 // // fsOut[9].push_back(f9);
|
mail@81
|
1039 // // oldlabeltype = 2;
|
mail@81
|
1040 // // }
|
mail@81
|
1041 // // } else {
|
mail@81
|
1042 // // if (oldlabeltype != 1) {
|
mail@81
|
1043 // // f9.label = "Music";
|
mail@81
|
1044 // // fsOut[9].push_back(f9);
|
mail@81
|
1045 // // oldlabeltype = 1;
|
mail@81
|
1046 // // }
|
mail@81
|
1047 // // }
|
mail@81
|
1048 // // f8.values.push_back(musicityValue[count]);
|
mail@81
|
1049 // // fsOut[8].push_back(f8);
|
mail@81
|
1050 // // count++;
|
mail@81
|
1051 // // }
|
Chris@23
|
1052 return fsOut;
|
matthiasm@0
|
1053
|
matthiasm@0
|
1054 }
|
matthiasm@0
|
1055
|
Chris@35
|
1056 #endif
|