Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@27
|
31 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
32
|
Chris@35
|
33 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
34 Plugin(inputSampleRate),
|
Chris@35
|
35 m_logSpectrum(0),
|
Chris@23
|
36 m_blockSize(0),
|
Chris@23
|
37 m_stepSize(0),
|
Chris@23
|
38 m_lengthOfNoteIndex(0),
|
Chris@23
|
39 m_meanTuning0(0),
|
Chris@23
|
40 m_meanTuning1(0),
|
Chris@23
|
41 m_meanTuning2(0),
|
Chris@23
|
42 m_localTuning0(0),
|
Chris@23
|
43 m_localTuning1(0),
|
Chris@23
|
44 m_localTuning2(0),
|
mail@41
|
45 m_whitening(1.0),
|
Chris@23
|
46 m_preset(0.0),
|
Chris@23
|
47 m_localTuning(0),
|
Chris@23
|
48 m_kernelValue(0),
|
Chris@23
|
49 m_kernelFftIndex(0),
|
Chris@23
|
50 m_kernelNoteIndex(0),
|
Chris@23
|
51 m_dict(0),
|
Chris@23
|
52 m_tuneLocal(false),
|
Chris@23
|
53 m_dictID(0),
|
Chris@23
|
54 m_chorddict(0),
|
Chris@23
|
55 m_chordnames(0),
|
Chris@23
|
56 m_doNormalizeChroma(0),
|
mail@41
|
57 m_rollon(0.0),
|
mail@41
|
58 m_s(0.7)
|
matthiasm@0
|
59 {
|
Chris@35
|
60 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
61
|
Chris@23
|
62 // make the *note* dictionary matrix
|
Chris@23
|
63 m_dict = new float[nNote * 84];
|
Chris@23
|
64 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
65 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@7
|
66
|
Chris@23
|
67 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
68 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
69 }
|
matthiasm@0
|
70
|
matthiasm@0
|
71
|
Chris@35
|
72 NNLSBase::~NNLSBase()
|
matthiasm@0
|
73 {
|
Chris@35
|
74 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
75 delete [] m_dict;
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77
|
matthiasm@0
|
78 string
|
Chris@35
|
79 NNLSBase::getMaker() const
|
matthiasm@0
|
80 {
|
Chris@23
|
81 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
82 // Your name here
|
matthiasm@0
|
83 return "Matthias Mauch";
|
matthiasm@0
|
84 }
|
matthiasm@0
|
85
|
matthiasm@0
|
86 int
|
Chris@35
|
87 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
88 {
|
Chris@23
|
89 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
90 // Increment this each time you release a version that behaves
|
matthiasm@0
|
91 // differently from the previous one
|
matthiasm@0
|
92 return 1;
|
matthiasm@0
|
93 }
|
matthiasm@0
|
94
|
matthiasm@0
|
95 string
|
Chris@35
|
96 NNLSBase::getCopyright() const
|
matthiasm@0
|
97 {
|
Chris@23
|
98 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
99 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
100 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
101 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
102 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
103 return "GPL";
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
Chris@35
|
106 NNLSBase::InputDomain
|
Chris@35
|
107 NNLSBase::getInputDomain() const
|
matthiasm@0
|
108 {
|
Chris@23
|
109 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
110 return FrequencyDomain;
|
matthiasm@0
|
111 }
|
matthiasm@0
|
112
|
matthiasm@0
|
113 size_t
|
Chris@35
|
114 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
115 {
|
Chris@23
|
116 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
117 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
118 }
|
matthiasm@0
|
119
|
matthiasm@0
|
120 size_t
|
Chris@35
|
121 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
122 {
|
Chris@23
|
123 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
124 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
125 // means the same as the block size for TimeDomain
|
Chris@23
|
126 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
127 }
|
matthiasm@0
|
128
|
matthiasm@0
|
129 size_t
|
Chris@35
|
130 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
131 {
|
Chris@23
|
132 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
133 return 1;
|
matthiasm@0
|
134 }
|
matthiasm@0
|
135
|
matthiasm@0
|
136 size_t
|
Chris@35
|
137 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
138 {
|
Chris@23
|
139 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
140 return 1;
|
matthiasm@0
|
141 }
|
matthiasm@0
|
142
|
Chris@35
|
143 NNLSBase::ParameterList
|
Chris@35
|
144 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
145 {
|
Chris@23
|
146 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
147 ParameterList list;
|
matthiasm@0
|
148
|
mail@41
|
149 ParameterDescriptor d0;
|
mail@41
|
150 d0.identifier = "rollon";
|
mail@41
|
151 d0.name = "spectral roll-on";
|
mail@41
|
152 d0.description = "The bins below the spectral roll-on quantile will be set to 0.";
|
mail@41
|
153 d0.unit = "";
|
mail@41
|
154 d0.minValue = 0;
|
mail@41
|
155 d0.maxValue = 0.05;
|
mail@41
|
156 d0.defaultValue = 0;
|
mail@41
|
157 d0.isQuantized = false;
|
mail@41
|
158 list.push_back(d0);
|
matthiasm@4
|
159
|
matthiasm@4
|
160 ParameterDescriptor d1;
|
matthiasm@4
|
161 d1.identifier = "tuningmode";
|
matthiasm@4
|
162 d1.name = "tuning mode";
|
matthiasm@4
|
163 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
164 d1.unit = "";
|
matthiasm@4
|
165 d1.minValue = 0;
|
matthiasm@4
|
166 d1.maxValue = 1;
|
matthiasm@4
|
167 d1.defaultValue = 0;
|
matthiasm@4
|
168 d1.isQuantized = true;
|
matthiasm@4
|
169 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
170 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
171 d1.quantizeStep = 1.0;
|
matthiasm@4
|
172 list.push_back(d1);
|
matthiasm@4
|
173
|
mail@41
|
174 ParameterDescriptor d2;
|
mail@41
|
175 d2.identifier = "whitening";
|
mail@41
|
176 d2.name = "spectral whitening";
|
mail@41
|
177 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
178 d2.unit = "";
|
mail@41
|
179 d2.isQuantized = true;
|
mail@41
|
180 d2.minValue = 0.0;
|
mail@41
|
181 d2.maxValue = 1.0;
|
mail@41
|
182 d2.defaultValue = 1.0;
|
mail@41
|
183 d2.isQuantized = false;
|
mail@41
|
184 list.push_back(d2);
|
mail@41
|
185
|
mail@41
|
186 ParameterDescriptor d3;
|
mail@41
|
187 d3.identifier = "s";
|
mail@41
|
188 d3.name = "spectral shape";
|
mail@41
|
189 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
190 d3.unit = "";
|
mail@41
|
191 d3.minValue = 0.5;
|
mail@41
|
192 d3.maxValue = 0.9;
|
mail@41
|
193 d3.defaultValue = 0.7;
|
mail@41
|
194 d3.isQuantized = false;
|
mail@41
|
195 list.push_back(d3);
|
mail@41
|
196
|
Chris@23
|
197 ParameterDescriptor d4;
|
matthiasm@12
|
198 d4.identifier = "chromanormalize";
|
matthiasm@12
|
199 d4.name = "chroma normalization";
|
matthiasm@12
|
200 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
201 d4.unit = "";
|
matthiasm@12
|
202 d4.minValue = 0;
|
matthiasm@13
|
203 d4.maxValue = 3;
|
matthiasm@12
|
204 d4.defaultValue = 0;
|
matthiasm@12
|
205 d4.isQuantized = true;
|
matthiasm@13
|
206 d4.valueNames.push_back("none");
|
matthiasm@13
|
207 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
208 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
209 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
210 d4.quantizeStep = 1.0;
|
matthiasm@12
|
211 list.push_back(d4);
|
matthiasm@4
|
212
|
matthiasm@0
|
213 return list;
|
matthiasm@0
|
214 }
|
matthiasm@0
|
215
|
matthiasm@0
|
216 float
|
Chris@35
|
217 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
218 {
|
Chris@23
|
219 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
220 if (identifier == "notedict") {
|
matthiasm@0
|
221 return m_dictID;
|
matthiasm@0
|
222 }
|
matthiasm@0
|
223
|
mail@41
|
224 if (identifier == "whitening") {
|
mail@41
|
225 return m_whitening;
|
mail@41
|
226 }
|
mail@41
|
227
|
mail@41
|
228 if (identifier == "s") {
|
mail@41
|
229 return m_s;
|
matthiasm@0
|
230 }
|
matthiasm@17
|
231
|
Chris@23
|
232 if (identifier == "rollon") {
|
matthiasm@17
|
233 return m_rollon;
|
matthiasm@17
|
234 }
|
matthiasm@0
|
235
|
matthiasm@0
|
236 if (identifier == "tuningmode") {
|
matthiasm@0
|
237 if (m_tuneLocal) {
|
matthiasm@0
|
238 return 1.0;
|
matthiasm@0
|
239 } else {
|
matthiasm@0
|
240 return 0.0;
|
matthiasm@0
|
241 }
|
matthiasm@0
|
242 }
|
Chris@23
|
243 if (identifier == "preset") {
|
Chris@23
|
244 return m_preset;
|
matthiasm@3
|
245 }
|
Chris@23
|
246 if (identifier == "chromanormalize") {
|
Chris@23
|
247 return m_doNormalizeChroma;
|
matthiasm@12
|
248 }
|
matthiasm@0
|
249 return 0;
|
matthiasm@0
|
250
|
matthiasm@0
|
251 }
|
matthiasm@0
|
252
|
matthiasm@0
|
253 void
|
Chris@35
|
254 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
255 {
|
Chris@23
|
256 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
257 if (identifier == "notedict") {
|
matthiasm@0
|
258 m_dictID = (int) value;
|
matthiasm@0
|
259 }
|
matthiasm@0
|
260
|
mail@41
|
261 if (identifier == "whitening") {
|
mail@41
|
262 m_whitening = value;
|
matthiasm@0
|
263 }
|
matthiasm@0
|
264
|
mail@41
|
265 if (identifier == "s") {
|
mail@41
|
266 m_s = value;
|
mail@41
|
267 }
|
mail@41
|
268
|
matthiasm@0
|
269 if (identifier == "tuningmode") {
|
matthiasm@0
|
270 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
271 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
272 }
|
matthiasm@3
|
273 if (identifier == "preset") {
|
matthiasm@3
|
274 m_preset = value;
|
Chris@23
|
275 if (m_preset == 0.0) {
|
Chris@23
|
276 m_tuneLocal = false;
|
mail@41
|
277 m_whitening = 1.0;
|
Chris@23
|
278 m_dictID = 0.0;
|
Chris@23
|
279 }
|
Chris@23
|
280 if (m_preset == 1.0) {
|
Chris@23
|
281 m_tuneLocal = false;
|
mail@41
|
282 m_whitening = 1.0;
|
Chris@23
|
283 m_dictID = 1.0;
|
Chris@23
|
284 }
|
Chris@23
|
285 if (m_preset == 2.0) {
|
Chris@23
|
286 m_tuneLocal = false;
|
mail@41
|
287 m_whitening = 0.7;
|
Chris@23
|
288 m_dictID = 0.0;
|
Chris@23
|
289 }
|
matthiasm@3
|
290 }
|
Chris@23
|
291 if (identifier == "chromanormalize") {
|
Chris@23
|
292 m_doNormalizeChroma = value;
|
Chris@23
|
293 }
|
matthiasm@17
|
294
|
Chris@23
|
295 if (identifier == "rollon") {
|
Chris@23
|
296 m_rollon = value;
|
Chris@23
|
297 }
|
matthiasm@0
|
298 }
|
matthiasm@0
|
299
|
Chris@35
|
300 NNLSBase::ProgramList
|
Chris@35
|
301 NNLSBase::getPrograms() const
|
matthiasm@0
|
302 {
|
Chris@23
|
303 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
304 ProgramList list;
|
matthiasm@0
|
305
|
matthiasm@0
|
306 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
307 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
308
|
matthiasm@0
|
309 return list;
|
matthiasm@0
|
310 }
|
matthiasm@0
|
311
|
matthiasm@0
|
312 string
|
Chris@35
|
313 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
314 {
|
Chris@23
|
315 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
316 return ""; // no programs
|
matthiasm@0
|
317 }
|
matthiasm@0
|
318
|
matthiasm@0
|
319 void
|
Chris@35
|
320 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
321 {
|
Chris@23
|
322 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
323 }
|
matthiasm@0
|
324
|
matthiasm@0
|
325
|
matthiasm@0
|
326 bool
|
Chris@35
|
327 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
328 {
|
Chris@23
|
329 if (debug_on) {
|
Chris@23
|
330 cerr << "--> initialise";
|
Chris@23
|
331 }
|
matthiasm@1
|
332
|
matthiasm@0
|
333 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
334 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
335 m_blockSize = blockSize;
|
matthiasm@0
|
336 m_stepSize = stepSize;
|
Chris@35
|
337 m_frameCount = 0;
|
Chris@23
|
338 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
339 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
340 float *tempkernel;
|
matthiasm@1
|
341
|
Chris@23
|
342 tempkernel = new float[tempn];
|
matthiasm@1
|
343
|
Chris@23
|
344 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
345 m_kernelValue.clear();
|
Chris@23
|
346 m_kernelFftIndex.clear();
|
Chris@23
|
347 m_kernelNoteIndex.clear();
|
Chris@23
|
348 int countNonzero = 0;
|
Chris@23
|
349 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
350 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
351 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
352 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
353 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
354 countNonzero++;
|
Chris@23
|
355 }
|
Chris@23
|
356 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
357 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
358 }
|
Chris@23
|
359 }
|
Chris@23
|
360 }
|
Chris@23
|
361 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
362 delete [] tempkernel;
|
Chris@35
|
363 /*
|
Chris@23
|
364 ofstream myfile;
|
Chris@23
|
365 myfile.open ("matrix.txt");
|
matthiasm@3
|
366 // myfile << "Writing this to a file.\n";
|
Chris@23
|
367 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
368 myfile << m_dict[i] << endl;
|
Chris@23
|
369 }
|
matthiasm@3
|
370 myfile.close();
|
Chris@35
|
371 */
|
matthiasm@0
|
372 return true;
|
matthiasm@0
|
373 }
|
matthiasm@0
|
374
|
matthiasm@0
|
375 void
|
Chris@35
|
376 NNLSBase::reset()
|
matthiasm@0
|
377 {
|
Chris@23
|
378 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
379
|
matthiasm@0
|
380 // Clear buffers, reset stored values, etc
|
Chris@35
|
381 m_frameCount = 0;
|
Chris@23
|
382 m_dictID = 0;
|
Chris@35
|
383 m_logSpectrum.clear();
|
Chris@23
|
384 m_meanTuning0 = 0;
|
Chris@23
|
385 m_meanTuning1 = 0;
|
Chris@23
|
386 m_meanTuning2 = 0;
|
Chris@23
|
387 m_localTuning0 = 0;
|
Chris@23
|
388 m_localTuning1 = 0;
|
Chris@23
|
389 m_localTuning2 = 0;
|
Chris@23
|
390 m_localTuning.clear();
|
matthiasm@0
|
391 }
|
matthiasm@0
|
392
|
Chris@35
|
393 void
|
Chris@35
|
394 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
395 {
|
Chris@35
|
396 m_frameCount++;
|
Chris@23
|
397 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
398
|
Chris@23
|
399 const float *fbuf = inputBuffers[0];
|
Chris@23
|
400 float energysum = 0;
|
Chris@23
|
401 // make magnitude
|
Chris@23
|
402 float maxmag = -10000;
|
Chris@23
|
403 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
404 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
405 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
406 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
407 if (m_rollon > 0) {
|
Chris@23
|
408 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
409 }
|
Chris@23
|
410 }
|
matthiasm@14
|
411
|
Chris@23
|
412 float cumenergy = 0;
|
Chris@23
|
413 if (m_rollon > 0) {
|
Chris@23
|
414 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
415 cumenergy += pow(magnitude[iBin],2);
|
Chris@23
|
416 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
|
Chris@23
|
417 else break;
|
Chris@23
|
418 }
|
Chris@23
|
419 }
|
matthiasm@17
|
420
|
Chris@23
|
421 if (maxmag < 2) {
|
Chris@23
|
422 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
423 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
424 magnitude[iBin] = 0;
|
Chris@23
|
425 }
|
Chris@23
|
426 }
|
matthiasm@4
|
427
|
Chris@23
|
428 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
429 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
430 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
431 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
432 }
|
Chris@23
|
433 int binCount = 0;
|
Chris@23
|
434 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
435 // cerr << ".";
|
Chris@23
|
436 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
437 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
438 binCount++;
|
Chris@23
|
439 }
|
Chris@23
|
440 // cerr << nm[20];
|
Chris@23
|
441 // cerr << endl;
|
matthiasm@0
|
442
|
matthiasm@0
|
443
|
Chris@35
|
444 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
445 // update means of complex tuning variables
|
Chris@35
|
446 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
447 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
|
Chris@35
|
448 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
|
matthiasm@0
|
449
|
matthiasm@0
|
450 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
451 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
452 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
453 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
454 float ratioOld = 0.997;
|
matthiasm@3
|
455 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
456 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
457 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
458 }
|
matthiasm@0
|
459
|
matthiasm@0
|
460 // if (m_tuneLocal) {
|
Chris@23
|
461 // local tuning
|
Chris@23
|
462 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
463 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
464 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
465 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
466
|
Chris@23
|
467 Feature f1; // logfreqspec
|
Chris@23
|
468 f1.hasTimestamp = true;
|
matthiasm@0
|
469 f1.timestamp = timestamp;
|
Chris@23
|
470 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
471 f1.values.push_back(nm[iNote]);
|
Chris@23
|
472 }
|
matthiasm@0
|
473
|
matthiasm@0
|
474 // deletes
|
matthiasm@0
|
475 delete[] magnitude;
|
matthiasm@0
|
476 delete[] nm;
|
matthiasm@0
|
477
|
Chris@35
|
478 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
479 }
|
matthiasm@0
|
480
|
Chris@35
|
481
|
Chris@35
|
482 #ifdef NOT_DEFINED
|
Chris@35
|
483
|
Chris@35
|
484 NNLSBase::FeatureSet
|
Chris@35
|
485 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
486 {
|
Chris@23
|
487 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
488 FeatureSet fsOut;
|
Chris@35
|
489 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
490 int nChord = m_chordnames.size();
|
Chris@23
|
491 //
|
Chris@23
|
492 /** Calculate Tuning
|
Chris@23
|
493 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
494 cumulative mean real and imag values)
|
Chris@23
|
495 **/
|
Chris@23
|
496 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
497 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
498 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
499 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
500 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
501 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
502
|
Chris@23
|
503 char buffer0 [50];
|
matthiasm@1
|
504
|
Chris@23
|
505 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
506
|
Chris@23
|
507 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
508
|
Chris@23
|
509 // push tuning to FeatureSet fsOut
|
Chris@23
|
510 Feature f0; // tuning
|
Chris@23
|
511 f0.hasTimestamp = true;
|
Chris@23
|
512 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
513 f0.label = buffer0;
|
Chris@23
|
514 fsOut[0].push_back(f0);
|
matthiasm@1
|
515
|
Chris@23
|
516 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
517 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
518 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
519 **/
|
Chris@23
|
520 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
521
|
Chris@23
|
522 float tempValue = 0;
|
Chris@23
|
523 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
524 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
525 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
526 int count = 0;
|
matthiasm@1
|
527
|
Chris@35
|
528 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
529 Feature f1 = *i;
|
Chris@23
|
530 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
531 f2.hasTimestamp = true;
|
Chris@23
|
532 f2.timestamp = f1.timestamp;
|
Chris@23
|
533 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
534
|
Chris@23
|
535 if (m_tuneLocal) {
|
Chris@23
|
536 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
537 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
538 }
|
matthiasm@1
|
539
|
Chris@23
|
540 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
541
|
Chris@23
|
542 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
543 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
544 f2.values.push_back(tempValue);
|
Chris@23
|
545 }
|
matthiasm@1
|
546
|
Chris@23
|
547 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
548 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
549 vector<float> runningstd;
|
Chris@23
|
550 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
551 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
552 }
|
Chris@23
|
553 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
554 for (int i = 0; i < 256; i++) {
|
Chris@23
|
555 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
556 if (runningstd[i] > 0) {
|
Chris@23
|
557 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@41
|
558 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
559 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@41
|
560 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
561 }
|
Chris@23
|
562 if (f2.values[i] < 0) {
|
Chris@23
|
563 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
564 }
|
Chris@23
|
565 }
|
Chris@23
|
566 fsOut[2].push_back(f2);
|
Chris@23
|
567 count++;
|
Chris@23
|
568 }
|
Chris@23
|
569 cerr << "done." << endl;
|
matthiasm@1
|
570
|
Chris@23
|
571 /** Semitone spectrum and chromagrams
|
Chris@23
|
572 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
573 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
574 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
575 bass and treble stacked onto each other).
|
Chris@23
|
576 **/
|
Chris@23
|
577 if (m_dictID == 1) {
|
Chris@23
|
578 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
579 } else {
|
Chris@23
|
580 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
581 }
|
matthiasm@13
|
582
|
matthiasm@1
|
583
|
Chris@23
|
584 vector<vector<float> > chordogram;
|
Chris@23
|
585 vector<vector<int> > scoreChordogram;
|
Chris@23
|
586 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
587 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
588 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
589 count = 0;
|
matthiasm@9
|
590
|
Chris@23
|
591 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
592 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
593 Feature f3; // semitone spectrum
|
Chris@23
|
594 Feature f4; // treble chromagram
|
Chris@23
|
595 Feature f5; // bass chromagram
|
Chris@23
|
596 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
597
|
Chris@23
|
598 f3.hasTimestamp = true;
|
Chris@23
|
599 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
600
|
Chris@23
|
601 f4.hasTimestamp = true;
|
Chris@23
|
602 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
603
|
Chris@23
|
604 f5.hasTimestamp = true;
|
Chris@23
|
605 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
606
|
Chris@23
|
607 f6.hasTimestamp = true;
|
Chris@23
|
608 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
609
|
Chris@29
|
610 float b[256];
|
matthiasm@1
|
611
|
Chris@23
|
612 bool some_b_greater_zero = false;
|
Chris@23
|
613 float sumb = 0;
|
Chris@23
|
614 for (int i = 0; i < 256; i++) {
|
Chris@23
|
615 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
616 b[i] = f2.values[i];
|
Chris@23
|
617 sumb += b[i];
|
Chris@23
|
618 if (b[i] > 0) {
|
Chris@23
|
619 some_b_greater_zero = true;
|
Chris@23
|
620 }
|
Chris@23
|
621 }
|
matthiasm@1
|
622
|
Chris@23
|
623 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
624
|
Chris@23
|
625 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
626 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
627 float currval;
|
Chris@23
|
628 unsigned iSemitone = 0;
|
matthiasm@1
|
629
|
Chris@23
|
630 if (some_b_greater_zero) {
|
Chris@23
|
631 if (m_dictID == 1) {
|
Chris@23
|
632 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
633 currval = 0;
|
Chris@23
|
634 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
635 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
636 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
637 f3.values.push_back(currval);
|
Chris@23
|
638 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
639 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
640 iSemitone++;
|
Chris@23
|
641 }
|
matthiasm@1
|
642
|
Chris@23
|
643 } else {
|
Chris@29
|
644 float x[84+1000];
|
Chris@23
|
645 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
646 vector<int> signifIndex;
|
Chris@23
|
647 int index=0;
|
Chris@23
|
648 sumb /= 84.0;
|
Chris@23
|
649 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
650 float currval = 0;
|
Chris@23
|
651 currval += b[iNote + 1 + -1];
|
Chris@23
|
652 currval += b[iNote + 1 + 0];
|
Chris@23
|
653 currval += b[iNote + 1 + 1];
|
Chris@23
|
654 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
655 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
656 index++;
|
Chris@23
|
657 }
|
Chris@29
|
658 float rnorm;
|
Chris@29
|
659 float w[84+1000];
|
Chris@29
|
660 float zz[84+1000];
|
Chris@23
|
661 int indx[84+1000];
|
Chris@23
|
662 int mode;
|
Chris@23
|
663 int dictsize = 256*signifIndex.size();
|
Chris@23
|
664 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
665 float *curr_dict = new float[dictsize];
|
Chris@23
|
666 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
667 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
668 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
669 }
|
Chris@23
|
670 }
|
Chris@29
|
671 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
672 delete [] curr_dict;
|
Chris@23
|
673 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
674 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
675 // cerr << mode << endl;
|
Chris@23
|
676 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
677 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
678 }
|
Chris@23
|
679 }
|
Chris@23
|
680 }
|
matthiasm@13
|
681
|
matthiasm@10
|
682
|
matthiasm@12
|
683
|
matthiasm@13
|
684
|
Chris@23
|
685 f4.values = chroma;
|
Chris@23
|
686 f5.values = basschroma;
|
Chris@23
|
687 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
688 f6.values = chroma;
|
matthiasm@1
|
689
|
Chris@23
|
690 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
691 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
692 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
693 case 0: // should never end up here
|
Chris@23
|
694 break;
|
Chris@23
|
695 case 1:
|
Chris@23
|
696 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
697 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
698 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
699 break;
|
Chris@23
|
700 case 2:
|
Chris@23
|
701 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
702 chromanorm[0] += *it;
|
Chris@23
|
703 }
|
Chris@23
|
704 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
705 chromanorm[1] += *it;
|
Chris@23
|
706 }
|
Chris@23
|
707 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
708 chromanorm[2] += *it;
|
Chris@23
|
709 }
|
Chris@23
|
710 break;
|
Chris@23
|
711 case 3:
|
Chris@23
|
712 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
713 chromanorm[0] += pow(*it,2);
|
Chris@23
|
714 }
|
Chris@23
|
715 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
716 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
717 chromanorm[1] += pow(*it,2);
|
Chris@23
|
718 }
|
Chris@23
|
719 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
720 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
721 chromanorm[2] += pow(*it,2);
|
Chris@23
|
722 }
|
Chris@23
|
723 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
724 break;
|
Chris@23
|
725 }
|
Chris@23
|
726 if (chromanorm[0] > 0) {
|
Chris@23
|
727 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
728 f4.values[i] /= chromanorm[0];
|
Chris@23
|
729 }
|
Chris@23
|
730 }
|
Chris@23
|
731 if (chromanorm[1] > 0) {
|
Chris@23
|
732 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
733 f5.values[i] /= chromanorm[1];
|
Chris@23
|
734 }
|
Chris@23
|
735 }
|
Chris@23
|
736 if (chromanorm[2] > 0) {
|
Chris@23
|
737 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
738 f6.values[i] /= chromanorm[2];
|
Chris@23
|
739 }
|
Chris@23
|
740 }
|
matthiasm@13
|
741
|
Chris@23
|
742 }
|
matthiasm@13
|
743
|
Chris@23
|
744 // local chord estimation
|
Chris@23
|
745 vector<float> currentChordSalience;
|
Chris@23
|
746 float tempchordvalue = 0;
|
Chris@23
|
747 float sumchordvalue = 0;
|
matthiasm@9
|
748
|
Chris@23
|
749 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
750 tempchordvalue = 0;
|
Chris@23
|
751 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
752 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
753 }
|
Chris@23
|
754 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
755 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
756 }
|
Chris@23
|
757 sumchordvalue+=tempchordvalue;
|
Chris@23
|
758 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
759 }
|
Chris@23
|
760 if (sumchordvalue > 0) {
|
Chris@23
|
761 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
762 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
763 }
|
Chris@23
|
764 } else {
|
Chris@23
|
765 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
766 }
|
Chris@23
|
767 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
768
|
Chris@23
|
769 fsOut[3].push_back(f3);
|
Chris@23
|
770 fsOut[4].push_back(f4);
|
Chris@23
|
771 fsOut[5].push_back(f5);
|
Chris@23
|
772 fsOut[6].push_back(f6);
|
Chris@23
|
773 count++;
|
Chris@23
|
774 }
|
Chris@23
|
775 cerr << "done." << endl;
|
matthiasm@13
|
776
|
matthiasm@10
|
777
|
Chris@23
|
778 /* Simple chord estimation
|
Chris@23
|
779 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
780 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
781 */
|
Chris@23
|
782 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
783 count = 0;
|
Chris@23
|
784 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
785 vector<int> chordSequence;
|
Chris@23
|
786 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
787 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
788 scoreChordogram.push_back(temp);
|
Chris@23
|
789 }
|
Chris@23
|
790 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
791 int startIndex = count + 1;
|
Chris@23
|
792 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
793
|
Chris@23
|
794 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
795
|
Chris@23
|
796 vector<int> chordCandidates;
|
Chris@23
|
797 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
798 // float currsum = 0;
|
Chris@23
|
799 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
800 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
801 // }
|
Chris@23
|
802 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
803 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
804 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
805 chordCandidates.push_back(iChord);
|
Chris@23
|
806 break;
|
Chris@23
|
807 }
|
Chris@23
|
808 }
|
Chris@23
|
809 }
|
Chris@23
|
810 chordCandidates.push_back(nChord-1);
|
Chris@23
|
811 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
812
|
Chris@23
|
813 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
814 float maxindex = 0; //... and the index thereof
|
Chris@23
|
815 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
816 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
817
|
Chris@23
|
818 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
819 // now find the max values on both sides of iWF
|
Chris@23
|
820 // left side:
|
Chris@23
|
821 float maxL = 0;
|
Chris@23
|
822 unsigned maxindL = nChord-1;
|
Chris@23
|
823 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
824 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
825 float currsum = 0;
|
Chris@23
|
826 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
827 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
828 }
|
Chris@23
|
829 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
830 if (currsum > maxL) {
|
Chris@23
|
831 maxL = currsum;
|
Chris@23
|
832 maxindL = iChord;
|
Chris@23
|
833 }
|
Chris@23
|
834 }
|
Chris@23
|
835 // right side:
|
Chris@23
|
836 float maxR = 0;
|
Chris@23
|
837 unsigned maxindR = nChord-1;
|
Chris@23
|
838 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
839 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
840 float currsum = 0;
|
Chris@23
|
841 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
842 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
843 }
|
Chris@23
|
844 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
845 if (currsum > maxR) {
|
Chris@23
|
846 maxR = currsum;
|
Chris@23
|
847 maxindR = iChord;
|
Chris@23
|
848 }
|
Chris@23
|
849 }
|
Chris@23
|
850 if (maxL+maxR > maxval) {
|
Chris@23
|
851 maxval = maxL+maxR;
|
Chris@23
|
852 maxindex = iWF;
|
Chris@23
|
853 bestchordL = maxindL;
|
Chris@23
|
854 bestchordR = maxindR;
|
Chris@23
|
855 }
|
matthiasm@3
|
856
|
Chris@23
|
857 }
|
Chris@23
|
858 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
859 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
860 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
861 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
862 }
|
Chris@23
|
863 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
864 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
865 }
|
Chris@23
|
866 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
867 count++;
|
Chris@23
|
868 }
|
Chris@23
|
869 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
870 count = 0;
|
Chris@23
|
871 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
872 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
873 float maxindex = 0; //... and the index thereof
|
Chris@23
|
874 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
875 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
876 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
877 maxindex = iChord;
|
Chris@23
|
878 // cerr << iChord << endl;
|
Chris@23
|
879 }
|
Chris@23
|
880 }
|
Chris@23
|
881 chordSequence.push_back(maxindex);
|
Chris@23
|
882 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
883 count++;
|
Chris@23
|
884 }
|
Chris@23
|
885 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
886
|
matthiasm@3
|
887
|
Chris@23
|
888 // mode filter on chordSequence
|
Chris@23
|
889 count = 0;
|
Chris@23
|
890 string oldChord = "";
|
Chris@23
|
891 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
892 Feature f6 = *it;
|
Chris@23
|
893 Feature f7; // chord estimate
|
Chris@23
|
894 f7.hasTimestamp = true;
|
Chris@23
|
895 f7.timestamp = f6.timestamp;
|
Chris@23
|
896 Feature f8; // chord estimate
|
Chris@23
|
897 f8.hasTimestamp = true;
|
Chris@23
|
898 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
899
|
Chris@23
|
900 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
901 int maxChordCount = 0;
|
Chris@23
|
902 int maxChordIndex = nChord-1;
|
Chris@23
|
903 string maxChord;
|
Chris@23
|
904 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
905 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
906 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
907 chordCount[chordSequence[i]]++;
|
Chris@23
|
908 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
909 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
910 maxChordCount++;
|
Chris@23
|
911 maxChordIndex = chordSequence[i];
|
Chris@23
|
912 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
913 }
|
Chris@23
|
914 }
|
Chris@23
|
915 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
916 // cerr << maxChordIndex << endl;
|
Chris@23
|
917 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
918 // cerr << chordchange[count] << endl;
|
Chris@23
|
919 fsOut[9].push_back(f8);
|
Chris@23
|
920 if (oldChord != maxChord) {
|
Chris@23
|
921 oldChord = maxChord;
|
matthiasm@3
|
922
|
Chris@23
|
923 // char buffer1 [50];
|
Chris@23
|
924 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
925 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
926 // } else {
|
Chris@23
|
927 // sprintf(buffer1, "N");
|
Chris@23
|
928 // }
|
Chris@23
|
929 // f7.label = buffer1;
|
Chris@23
|
930 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
931 fsOut[7].push_back(f7);
|
Chris@23
|
932 }
|
Chris@23
|
933 count++;
|
Chris@23
|
934 }
|
Chris@23
|
935 Feature f7; // last chord estimate
|
Chris@23
|
936 f7.hasTimestamp = true;
|
Chris@23
|
937 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
938 f7.label = "N";
|
Chris@23
|
939 fsOut[7].push_back(f7);
|
Chris@23
|
940 cerr << "done." << endl;
|
Chris@23
|
941 // // musicity
|
Chris@23
|
942 // count = 0;
|
Chris@23
|
943 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
944 // vector<float> musicityValue;
|
Chris@23
|
945 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
946 // Feature f4 = *it;
|
Chris@23
|
947 //
|
Chris@23
|
948 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
949 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
950 // float chromasum = 0;
|
Chris@23
|
951 // float diffsum = 0;
|
Chris@23
|
952 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
953 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
954 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
955 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
956 // }
|
Chris@23
|
957 // }
|
Chris@23
|
958 // diffsum /= chromasum;
|
Chris@23
|
959 // musicityValue.push_back(diffsum);
|
Chris@23
|
960 // count++;
|
Chris@23
|
961 // }
|
Chris@23
|
962 //
|
Chris@23
|
963 // float musicityThreshold = 0.44;
|
Chris@23
|
964 // if (m_stepSize == 4096) {
|
Chris@23
|
965 // musicityThreshold = 0.74;
|
Chris@23
|
966 // }
|
Chris@23
|
967 // if (m_stepSize == 4410) {
|
Chris@23
|
968 // musicityThreshold = 0.77;
|
Chris@23
|
969 // }
|
Chris@23
|
970 //
|
Chris@23
|
971 // count = 0;
|
Chris@23
|
972 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
973 // Feature f4 = *it;
|
Chris@23
|
974 // Feature f8; // musicity
|
Chris@23
|
975 // Feature f9; // musicity segmenter
|
Chris@23
|
976 //
|
Chris@23
|
977 // f8.hasTimestamp = true;
|
Chris@23
|
978 // f8.timestamp = f4.timestamp;
|
Chris@23
|
979 // f9.hasTimestamp = true;
|
Chris@23
|
980 // f9.timestamp = f4.timestamp;
|
Chris@23
|
981 //
|
Chris@23
|
982 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
983 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
984 // int musicityCount = 0;
|
Chris@23
|
985 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
986 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
987 // }
|
Chris@23
|
988 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
989 //
|
Chris@23
|
990 // if (isSpeech) {
|
Chris@23
|
991 // if (oldlabeltype != 2) {
|
Chris@23
|
992 // f9.label = "Speech";
|
Chris@23
|
993 // fsOut[9].push_back(f9);
|
Chris@23
|
994 // oldlabeltype = 2;
|
Chris@23
|
995 // }
|
Chris@23
|
996 // } else {
|
Chris@23
|
997 // if (oldlabeltype != 1) {
|
Chris@23
|
998 // f9.label = "Music";
|
Chris@23
|
999 // fsOut[9].push_back(f9);
|
Chris@23
|
1000 // oldlabeltype = 1;
|
Chris@23
|
1001 // }
|
Chris@23
|
1002 // }
|
Chris@23
|
1003 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1004 // fsOut[8].push_back(f8);
|
Chris@23
|
1005 // count++;
|
Chris@23
|
1006 // }
|
Chris@23
|
1007 return fsOut;
|
matthiasm@0
|
1008
|
matthiasm@0
|
1009 }
|
matthiasm@0
|
1010
|
Chris@35
|
1011 #endif
|