Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@35
|
31 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
32 Plugin(inputSampleRate),
|
Chris@35
|
33 m_logSpectrum(0),
|
Chris@23
|
34 m_blockSize(0),
|
Chris@23
|
35 m_stepSize(0),
|
Chris@23
|
36 m_lengthOfNoteIndex(0),
|
mail@80
|
37 m_meanTunings(0),
|
mail@80
|
38 m_localTunings(0),
|
mail@41
|
39 m_whitening(1.0),
|
Chris@23
|
40 m_preset(0.0),
|
Chris@23
|
41 m_localTuning(0),
|
Chris@23
|
42 m_kernelValue(0),
|
Chris@23
|
43 m_kernelFftIndex(0),
|
Chris@23
|
44 m_kernelNoteIndex(0),
|
Chris@23
|
45 m_dict(0),
|
mail@60
|
46 m_tuneLocal(0),
|
Chris@23
|
47 m_doNormalizeChroma(0),
|
mail@60
|
48 m_rollon(0),
|
matthiasm@42
|
49 m_s(0.7),
|
matthiasm@50
|
50 m_useNNLS(1),
|
mail@80
|
51 m_useHMM(1),
|
mail@80
|
52 sinvalues(0),
|
mail@80
|
53 cosvalues(0)
|
matthiasm@0
|
54 {
|
Chris@35
|
55 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
56
|
Chris@23
|
57 // make the *note* dictionary matrix
|
Chris@23
|
58 m_dict = new float[nNote * 84];
|
Chris@23
|
59 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
60 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@0
|
61 }
|
matthiasm@0
|
62
|
matthiasm@0
|
63
|
Chris@35
|
64 NNLSBase::~NNLSBase()
|
matthiasm@0
|
65 {
|
Chris@35
|
66 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
67 delete [] m_dict;
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69
|
matthiasm@0
|
70 string
|
Chris@35
|
71 NNLSBase::getMaker() const
|
matthiasm@0
|
72 {
|
Chris@23
|
73 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
74 // Your name here
|
matthiasm@0
|
75 return "Matthias Mauch";
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77
|
matthiasm@0
|
78 int
|
Chris@35
|
79 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
80 {
|
Chris@23
|
81 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
82 // Increment this each time you release a version that behaves
|
matthiasm@0
|
83 // differently from the previous one
|
matthiasm@0
|
84 return 1;
|
matthiasm@0
|
85 }
|
matthiasm@0
|
86
|
matthiasm@0
|
87 string
|
Chris@35
|
88 NNLSBase::getCopyright() const
|
matthiasm@0
|
89 {
|
Chris@23
|
90 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
91 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
92 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
93 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
94 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
95 return "GPL";
|
matthiasm@0
|
96 }
|
matthiasm@0
|
97
|
Chris@35
|
98 NNLSBase::InputDomain
|
Chris@35
|
99 NNLSBase::getInputDomain() const
|
matthiasm@0
|
100 {
|
Chris@23
|
101 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
102 return FrequencyDomain;
|
matthiasm@0
|
103 }
|
matthiasm@0
|
104
|
matthiasm@0
|
105 size_t
|
Chris@35
|
106 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
107 {
|
Chris@23
|
108 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
109 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111
|
matthiasm@0
|
112 size_t
|
Chris@35
|
113 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
114 {
|
Chris@23
|
115 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
116 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
117 // means the same as the block size for TimeDomain
|
Chris@23
|
118 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
119 }
|
matthiasm@0
|
120
|
matthiasm@0
|
121 size_t
|
Chris@35
|
122 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
123 {
|
Chris@23
|
124 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
125 return 1;
|
matthiasm@0
|
126 }
|
matthiasm@0
|
127
|
matthiasm@0
|
128 size_t
|
Chris@35
|
129 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
130 {
|
Chris@23
|
131 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
132 return 1;
|
matthiasm@0
|
133 }
|
matthiasm@0
|
134
|
Chris@35
|
135 NNLSBase::ParameterList
|
Chris@35
|
136 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
137 {
|
Chris@23
|
138 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
139 ParameterList list;
|
matthiasm@0
|
140
|
matthiasm@42
|
141 ParameterDescriptor d;
|
matthiasm@42
|
142 d.identifier = "useNNLS";
|
matthiasm@42
|
143 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
144 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
145 d.unit = "";
|
matthiasm@42
|
146 d.minValue = 0.0;
|
matthiasm@42
|
147 d.maxValue = 1.0;
|
matthiasm@42
|
148 d.defaultValue = 1.0;
|
matthiasm@42
|
149 d.isQuantized = true;
|
matthiasm@42
|
150 d.quantizeStep = 1.0;
|
matthiasm@42
|
151 list.push_back(d);
|
matthiasm@42
|
152
|
mail@41
|
153 ParameterDescriptor d0;
|
mail@41
|
154 d0.identifier = "rollon";
|
mail@41
|
155 d0.name = "spectral roll-on";
|
matthiasm@58
|
156 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
157 d0.unit = "%";
|
mail@41
|
158 d0.minValue = 0;
|
matthiasm@59
|
159 d0.maxValue = 5;
|
mail@41
|
160 d0.defaultValue = 0;
|
matthiasm@48
|
161 d0.isQuantized = true;
|
matthiasm@59
|
162 d0.quantizeStep = 0.5;
|
mail@41
|
163 list.push_back(d0);
|
matthiasm@4
|
164
|
matthiasm@4
|
165 ParameterDescriptor d1;
|
matthiasm@4
|
166 d1.identifier = "tuningmode";
|
matthiasm@4
|
167 d1.name = "tuning mode";
|
matthiasm@4
|
168 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
169 d1.unit = "";
|
matthiasm@4
|
170 d1.minValue = 0;
|
matthiasm@4
|
171 d1.maxValue = 1;
|
matthiasm@4
|
172 d1.defaultValue = 0;
|
matthiasm@4
|
173 d1.isQuantized = true;
|
matthiasm@4
|
174 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
175 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
176 d1.quantizeStep = 1.0;
|
matthiasm@4
|
177 list.push_back(d1);
|
matthiasm@4
|
178
|
mail@41
|
179 ParameterDescriptor d2;
|
mail@41
|
180 d2.identifier = "whitening";
|
mail@41
|
181 d2.name = "spectral whitening";
|
mail@41
|
182 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
183 d2.unit = "";
|
mail@41
|
184 d2.isQuantized = true;
|
mail@41
|
185 d2.minValue = 0.0;
|
mail@41
|
186 d2.maxValue = 1.0;
|
mail@41
|
187 d2.defaultValue = 1.0;
|
mail@41
|
188 d2.isQuantized = false;
|
mail@41
|
189 list.push_back(d2);
|
mail@41
|
190
|
mail@41
|
191 ParameterDescriptor d3;
|
mail@41
|
192 d3.identifier = "s";
|
mail@41
|
193 d3.name = "spectral shape";
|
mail@41
|
194 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
195 d3.unit = "";
|
mail@41
|
196 d3.minValue = 0.5;
|
mail@41
|
197 d3.maxValue = 0.9;
|
mail@41
|
198 d3.defaultValue = 0.7;
|
mail@41
|
199 d3.isQuantized = false;
|
mail@41
|
200 list.push_back(d3);
|
mail@41
|
201
|
Chris@23
|
202 ParameterDescriptor d4;
|
matthiasm@12
|
203 d4.identifier = "chromanormalize";
|
matthiasm@12
|
204 d4.name = "chroma normalization";
|
matthiasm@12
|
205 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
206 d4.unit = "";
|
matthiasm@12
|
207 d4.minValue = 0;
|
matthiasm@13
|
208 d4.maxValue = 3;
|
matthiasm@12
|
209 d4.defaultValue = 0;
|
matthiasm@12
|
210 d4.isQuantized = true;
|
matthiasm@13
|
211 d4.valueNames.push_back("none");
|
matthiasm@13
|
212 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
213 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
214 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
215 d4.quantizeStep = 1.0;
|
matthiasm@12
|
216 list.push_back(d4);
|
matthiasm@4
|
217
|
matthiasm@0
|
218 return list;
|
matthiasm@0
|
219 }
|
matthiasm@0
|
220
|
matthiasm@0
|
221 float
|
Chris@35
|
222 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
223 {
|
Chris@23
|
224 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
225 if (identifier == "useNNLS") {
|
matthiasm@42
|
226 return m_useNNLS;
|
matthiasm@0
|
227 }
|
matthiasm@0
|
228
|
mail@41
|
229 if (identifier == "whitening") {
|
mail@41
|
230 return m_whitening;
|
mail@41
|
231 }
|
mail@41
|
232
|
mail@41
|
233 if (identifier == "s") {
|
mail@41
|
234 return m_s;
|
matthiasm@0
|
235 }
|
matthiasm@17
|
236
|
Chris@23
|
237 if (identifier == "rollon") {
|
matthiasm@17
|
238 return m_rollon;
|
matthiasm@17
|
239 }
|
matthiasm@0
|
240
|
matthiasm@0
|
241 if (identifier == "tuningmode") {
|
matthiasm@0
|
242 if (m_tuneLocal) {
|
matthiasm@0
|
243 return 1.0;
|
matthiasm@0
|
244 } else {
|
matthiasm@0
|
245 return 0.0;
|
matthiasm@0
|
246 }
|
matthiasm@0
|
247 }
|
Chris@23
|
248 if (identifier == "preset") {
|
Chris@23
|
249 return m_preset;
|
matthiasm@3
|
250 }
|
Chris@23
|
251 if (identifier == "chromanormalize") {
|
Chris@23
|
252 return m_doNormalizeChroma;
|
matthiasm@12
|
253 }
|
matthiasm@50
|
254
|
matthiasm@50
|
255 if (identifier == "useHMM") {
|
matthiasm@50
|
256 return m_useHMM;
|
matthiasm@50
|
257 }
|
matthiasm@50
|
258
|
matthiasm@0
|
259 return 0;
|
matthiasm@0
|
260
|
matthiasm@0
|
261 }
|
matthiasm@0
|
262
|
matthiasm@0
|
263 void
|
Chris@35
|
264 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
265 {
|
Chris@23
|
266 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
267 if (identifier == "useNNLS") {
|
matthiasm@42
|
268 m_useNNLS = (int) value;
|
matthiasm@0
|
269 }
|
matthiasm@0
|
270
|
mail@41
|
271 if (identifier == "whitening") {
|
mail@41
|
272 m_whitening = value;
|
matthiasm@0
|
273 }
|
matthiasm@0
|
274
|
mail@41
|
275 if (identifier == "s") {
|
mail@41
|
276 m_s = value;
|
mail@41
|
277 }
|
mail@41
|
278
|
matthiasm@50
|
279 if (identifier == "useHMM") {
|
matthiasm@50
|
280 m_useHMM = value;
|
matthiasm@50
|
281 }
|
matthiasm@50
|
282
|
matthiasm@0
|
283 if (identifier == "tuningmode") {
|
mail@60
|
284 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
285 m_tuneLocal = value;
|
matthiasm@0
|
286 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
287 }
|
matthiasm@42
|
288 // if (identifier == "preset") {
|
matthiasm@42
|
289 // m_preset = value;
|
matthiasm@42
|
290 // if (m_preset == 0.0) {
|
matthiasm@42
|
291 // m_tuneLocal = false;
|
matthiasm@42
|
292 // m_whitening = 1.0;
|
matthiasm@42
|
293 // m_dictID = 0.0;
|
matthiasm@42
|
294 // }
|
matthiasm@42
|
295 // if (m_preset == 1.0) {
|
matthiasm@42
|
296 // m_tuneLocal = false;
|
matthiasm@42
|
297 // m_whitening = 1.0;
|
matthiasm@42
|
298 // m_dictID = 1.0;
|
matthiasm@42
|
299 // }
|
matthiasm@42
|
300 // if (m_preset == 2.0) {
|
matthiasm@42
|
301 // m_tuneLocal = false;
|
matthiasm@42
|
302 // m_whitening = 0.7;
|
matthiasm@42
|
303 // m_dictID = 0.0;
|
matthiasm@42
|
304 // }
|
matthiasm@42
|
305 // }
|
Chris@23
|
306 if (identifier == "chromanormalize") {
|
Chris@23
|
307 m_doNormalizeChroma = value;
|
Chris@23
|
308 }
|
matthiasm@17
|
309
|
Chris@23
|
310 if (identifier == "rollon") {
|
Chris@23
|
311 m_rollon = value;
|
Chris@23
|
312 }
|
matthiasm@0
|
313 }
|
matthiasm@0
|
314
|
Chris@35
|
315 NNLSBase::ProgramList
|
Chris@35
|
316 NNLSBase::getPrograms() const
|
matthiasm@0
|
317 {
|
Chris@23
|
318 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
319 ProgramList list;
|
matthiasm@0
|
320
|
matthiasm@0
|
321 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
322 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
323
|
matthiasm@0
|
324 return list;
|
matthiasm@0
|
325 }
|
matthiasm@0
|
326
|
matthiasm@0
|
327 string
|
Chris@35
|
328 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
329 {
|
Chris@23
|
330 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
331 return ""; // no programs
|
matthiasm@0
|
332 }
|
matthiasm@0
|
333
|
matthiasm@0
|
334 void
|
Chris@35
|
335 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
336 {
|
Chris@23
|
337 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
338 }
|
matthiasm@0
|
339
|
matthiasm@0
|
340
|
matthiasm@0
|
341 bool
|
Chris@35
|
342 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
343 {
|
Chris@23
|
344 if (debug_on) {
|
Chris@23
|
345 cerr << "--> initialise";
|
Chris@23
|
346 }
|
matthiasm@1
|
347
|
mail@80
|
348 // make things for tuning estimation
|
mail@80
|
349 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
350 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
351 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
352 }
|
mail@80
|
353
|
mail@80
|
354
|
mail@80
|
355 // make hamming window of length 1/2 octave
|
mail@76
|
356 int hamwinlength = nBPS * 6 + 1;
|
mail@76
|
357 float hamwinsum = 0;
|
mail@76
|
358 for (int i = 0; i < hamwinlength; ++i) {
|
mail@76
|
359 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
mail@76
|
360 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
mail@76
|
361 }
|
mail@77
|
362 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
|
mail@80
|
363
|
mail@80
|
364
|
mail@80
|
365 // initialise the tuning
|
mail@80
|
366 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
367 m_meanTunings.push_back(0);
|
mail@80
|
368 m_localTunings.push_back(0);
|
mail@80
|
369 }
|
mail@76
|
370
|
matthiasm@0
|
371 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
372 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
373 m_blockSize = blockSize;
|
matthiasm@0
|
374 m_stepSize = stepSize;
|
Chris@35
|
375 m_frameCount = 0;
|
mail@77
|
376 int tempn = nNote * m_blockSize/2;
|
Chris@23
|
377 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
378 float *tempkernel;
|
matthiasm@1
|
379
|
Chris@23
|
380 tempkernel = new float[tempn];
|
matthiasm@1
|
381
|
Chris@23
|
382 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
383 m_kernelValue.clear();
|
Chris@23
|
384 m_kernelFftIndex.clear();
|
Chris@23
|
385 m_kernelNoteIndex.clear();
|
Chris@23
|
386 int countNonzero = 0;
|
Chris@23
|
387 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
388 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
389 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
390 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
391 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
392 countNonzero++;
|
Chris@23
|
393 }
|
Chris@23
|
394 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
395 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
396 }
|
Chris@23
|
397 }
|
Chris@23
|
398 }
|
Chris@23
|
399 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
400 delete [] tempkernel;
|
Chris@35
|
401 /*
|
Chris@23
|
402 ofstream myfile;
|
Chris@23
|
403 myfile.open ("matrix.txt");
|
matthiasm@3
|
404 // myfile << "Writing this to a file.\n";
|
Chris@23
|
405 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
406 myfile << m_dict[i] << endl;
|
Chris@23
|
407 }
|
matthiasm@3
|
408 myfile.close();
|
Chris@35
|
409 */
|
matthiasm@0
|
410 return true;
|
matthiasm@0
|
411 }
|
matthiasm@0
|
412
|
matthiasm@0
|
413 void
|
Chris@35
|
414 NNLSBase::reset()
|
matthiasm@0
|
415 {
|
Chris@23
|
416 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
417
|
matthiasm@0
|
418 // Clear buffers, reset stored values, etc
|
Chris@35
|
419 m_frameCount = 0;
|
matthiasm@42
|
420 // m_dictID = 0;
|
Chris@35
|
421 m_logSpectrum.clear();
|
mail@80
|
422 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
423 m_meanTunings[iBPS] = 0;
|
mail@80
|
424 m_localTunings[iBPS] = 0;
|
mail@80
|
425 }
|
Chris@23
|
426 m_localTuning.clear();
|
matthiasm@0
|
427 }
|
matthiasm@0
|
428
|
Chris@35
|
429 void
|
Chris@35
|
430 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
431 {
|
Chris@35
|
432 m_frameCount++;
|
Chris@23
|
433 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
434
|
Chris@23
|
435 const float *fbuf = inputBuffers[0];
|
Chris@23
|
436 float energysum = 0;
|
Chris@23
|
437 // make magnitude
|
Chris@23
|
438 float maxmag = -10000;
|
Chris@23
|
439 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
440 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
441 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
442 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
443 if (m_rollon > 0) {
|
Chris@23
|
444 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
445 }
|
Chris@23
|
446 }
|
matthiasm@14
|
447
|
Chris@23
|
448 float cumenergy = 0;
|
Chris@23
|
449 if (m_rollon > 0) {
|
Chris@23
|
450 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
451 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
452 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
453 else break;
|
Chris@23
|
454 }
|
Chris@23
|
455 }
|
matthiasm@17
|
456
|
Chris@23
|
457 if (maxmag < 2) {
|
Chris@23
|
458 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
459 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
460 magnitude[iBin] = 0;
|
Chris@23
|
461 }
|
Chris@23
|
462 }
|
matthiasm@4
|
463
|
Chris@23
|
464 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
465 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
466 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
467 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
468 }
|
Chris@23
|
469 int binCount = 0;
|
Chris@23
|
470 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
471 // cerr << ".";
|
Chris@23
|
472 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
473 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
474 binCount++;
|
Chris@23
|
475 }
|
Chris@23
|
476 // cerr << nm[20];
|
Chris@23
|
477 // cerr << endl;
|
matthiasm@0
|
478
|
matthiasm@0
|
479
|
Chris@35
|
480 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
481 // update means of complex tuning variables
|
mail@80
|
482 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
|
mail@80
|
483
|
mail@80
|
484 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
|
mail@80
|
485 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
|
Chris@23
|
486 float ratioOld = 0.997;
|
mail@80
|
487 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
488 m_localTunings[iBPS] *= ratioOld;
|
mail@80
|
489 m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
|
mail@80
|
490 }
|
matthiasm@0
|
491 }
|
matthiasm@0
|
492 // if (m_tuneLocal) {
|
Chris@23
|
493 // local tuning
|
mail@80
|
494 // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
|
mail@80
|
495 // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
|
mail@80
|
496
|
mail@80
|
497 float localTuningImag = 0;
|
mail@80
|
498 float localTuningReal = 0;
|
mail@80
|
499 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
500 localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
501 localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
502 }
|
mail@80
|
503
|
Chris@23
|
504 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
505 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
506
|
Chris@23
|
507 Feature f1; // logfreqspec
|
Chris@23
|
508 f1.hasTimestamp = true;
|
matthiasm@0
|
509 f1.timestamp = timestamp;
|
Chris@23
|
510 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
511 f1.values.push_back(nm[iNote]);
|
Chris@23
|
512 }
|
matthiasm@0
|
513
|
matthiasm@0
|
514 // deletes
|
matthiasm@0
|
515 delete[] magnitude;
|
matthiasm@0
|
516 delete[] nm;
|
matthiasm@0
|
517
|
Chris@35
|
518 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
519 }
|
matthiasm@0
|
520
|
Chris@35
|
521
|
Chris@35
|
522 #ifdef NOT_DEFINED
|
Chris@35
|
523
|
Chris@35
|
524 NNLSBase::FeatureSet
|
Chris@35
|
525 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
526 {
|
mail@81
|
527 // if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
mail@81
|
528 FeatureSet fsOut;
|
mail@81
|
529 // if (m_logSpectrum.size() == 0) return fsOut;
|
mail@81
|
530 // int nChord = m_chordnames.size();
|
mail@81
|
531 // //
|
mail@81
|
532 // /** Calculate Tuning
|
mail@81
|
533 // calculate tuning from (using the angle of the complex number defined by the
|
mail@81
|
534 // cumulative mean real and imag values)
|
mail@81
|
535 // **/
|
mail@81
|
536 // float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
|
mail@81
|
537 // float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
|
mail@81
|
538 // float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
mail@81
|
539 // float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
mail@81
|
540 // int intShift = floor(normalisedtuning * 3);
|
mail@81
|
541 // float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
mail@81
|
542 //
|
mail@81
|
543 // char buffer0 [50];
|
mail@81
|
544 //
|
mail@81
|
545 // sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
mail@81
|
546 //
|
mail@81
|
547 // // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
mail@81
|
548 //
|
mail@81
|
549 // // push tuning to FeatureSet fsOut
|
mail@81
|
550 // Feature f0; // tuning
|
mail@81
|
551 // f0.hasTimestamp = true;
|
mail@81
|
552 // f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
mail@81
|
553 // f0.label = buffer0;
|
mail@81
|
554 // fsOut[0].push_back(f0);
|
mail@81
|
555 //
|
mail@81
|
556 // /** Tune Log-Frequency Spectrogram
|
mail@81
|
557 // calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
mail@81
|
558 // perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
mail@81
|
559 // **/
|
mail@81
|
560 // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
mail@81
|
561 //
|
mail@81
|
562 // float tempValue = 0;
|
mail@81
|
563 // float dbThreshold = 0; // relative to the background spectrum
|
mail@81
|
564 // float thresh = pow(10,dbThreshold/20);
|
mail@81
|
565 // // cerr << "tune local ? " << m_tuneLocal << endl;
|
mail@81
|
566 // int count = 0;
|
mail@81
|
567 //
|
mail@81
|
568 // for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
mail@81
|
569 // Feature f1 = *i;
|
mail@81
|
570 // Feature f2; // tuned log-frequency spectrum
|
mail@81
|
571 // f2.hasTimestamp = true;
|
mail@81
|
572 // f2.timestamp = f1.timestamp;
|
mail@81
|
573 // f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
mail@81
|
574 //
|
mail@81
|
575 // if (m_tuneLocal == 1.0) {
|
mail@81
|
576 // intShift = floor(m_localTuning[count] * 3);
|
mail@81
|
577 // floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
mail@81
|
578 // }
|
mail@81
|
579 //
|
mail@81
|
580 // // cerr << intShift << " " << floatShift << endl;
|
mail@81
|
581 //
|
mail@81
|
582 // for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@81
|
583 // tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
|
mail@81
|
584 // f2.values.push_back(tempValue);
|
mail@81
|
585 // }
|
mail@81
|
586 //
|
mail@81
|
587 // f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
mail@81
|
588 // vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
mail@81
|
589 // vector<float> runningstd;
|
mail@81
|
590 // for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
mail@81
|
591 // runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
mail@81
|
592 // }
|
mail@81
|
593 // runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@81
|
594 // for (int i = 0; i < nNote; i++) {
|
mail@81
|
595 // runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
mail@81
|
596 // if (runningstd[i] > 0) {
|
mail@81
|
597 // // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@81
|
598 // // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
mail@81
|
599 // f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@81
|
600 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
mail@81
|
601 // }
|
mail@81
|
602 // if (f2.values[i] < 0) {
|
mail@81
|
603 // cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
mail@81
|
604 // }
|
mail@81
|
605 // }
|
mail@81
|
606 // fsOut[2].push_back(f2);
|
mail@81
|
607 // count++;
|
mail@81
|
608 // }
|
mail@81
|
609 // cerr << "done." << endl;
|
mail@81
|
610 //
|
mail@81
|
611 // /** Semitone spectrum and chromagrams
|
mail@81
|
612 // Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
mail@81
|
613 // is inferred using a non-negative least squares algorithm.
|
mail@81
|
614 // Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
mail@81
|
615 // bass and treble stacked onto each other).
|
mail@81
|
616 // **/
|
mail@81
|
617 // if (m_useNNLS == 0) {
|
mail@81
|
618 // cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
mail@81
|
619 // } else {
|
mail@81
|
620 // cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
mail@81
|
621 // }
|
Chris@23
|
622 //
|
mail@81
|
623 //
|
mail@81
|
624 // vector<vector<float> > chordogram;
|
mail@81
|
625 // vector<vector<int> > scoreChordogram;
|
mail@81
|
626 // vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
mail@81
|
627 // vector<float> oldchroma = vector<float>(12,0);
|
mail@81
|
628 // vector<float> oldbasschroma = vector<float>(12,0);
|
mail@81
|
629 // count = 0;
|
mail@81
|
630 //
|
mail@81
|
631 // for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
mail@81
|
632 // Feature f2 = *it; // logfreq spectrum
|
mail@81
|
633 // Feature f3; // semitone spectrum
|
mail@81
|
634 // Feature f4; // treble chromagram
|
mail@81
|
635 // Feature f5; // bass chromagram
|
mail@81
|
636 // Feature f6; // treble and bass chromagram
|
mail@81
|
637 //
|
mail@81
|
638 // f3.hasTimestamp = true;
|
mail@81
|
639 // f3.timestamp = f2.timestamp;
|
mail@81
|
640 //
|
mail@81
|
641 // f4.hasTimestamp = true;
|
mail@81
|
642 // f4.timestamp = f2.timestamp;
|
mail@81
|
643 //
|
mail@81
|
644 // f5.hasTimestamp = true;
|
mail@81
|
645 // f5.timestamp = f2.timestamp;
|
mail@81
|
646 //
|
mail@81
|
647 // f6.hasTimestamp = true;
|
mail@81
|
648 // f6.timestamp = f2.timestamp;
|
mail@81
|
649 //
|
mail@81
|
650 // float b[nNote];
|
mail@81
|
651 //
|
mail@81
|
652 // bool some_b_greater_zero = false;
|
mail@81
|
653 // float sumb = 0;
|
mail@81
|
654 // for (int i = 0; i < nNote; i++) {
|
mail@81
|
655 // // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
mail@81
|
656 // b[i] = f2.values[i];
|
mail@81
|
657 // sumb += b[i];
|
mail@81
|
658 // if (b[i] > 0) {
|
mail@81
|
659 // some_b_greater_zero = true;
|
mail@81
|
660 // }
|
mail@81
|
661 // }
|
mail@81
|
662 //
|
mail@81
|
663 // // here's where the non-negative least squares algorithm calculates the note activation x
|
mail@81
|
664 //
|
mail@81
|
665 // vector<float> chroma = vector<float>(12, 0);
|
mail@81
|
666 // vector<float> basschroma = vector<float>(12, 0);
|
mail@81
|
667 // float currval;
|
mail@81
|
668 // unsigned iSemitone = 0;
|
mail@81
|
669 //
|
mail@81
|
670 // if (some_b_greater_zero) {
|
mail@81
|
671 // if (m_useNNLS == 0) {
|
mail@81
|
672 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
mail@81
|
673 // currval = 0;
|
mail@81
|
674 // currval += b[iNote + 1 + -1] * 0.5;
|
mail@81
|
675 // currval += b[iNote + 1 + 0] * 1.0;
|
mail@81
|
676 // currval += b[iNote + 1 + 1] * 0.5;
|
mail@81
|
677 // f3.values.push_back(currval);
|
mail@81
|
678 // chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
mail@81
|
679 // basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
mail@81
|
680 // iSemitone++;
|
mail@81
|
681 // }
|
mail@81
|
682 //
|
mail@81
|
683 // } else {
|
mail@81
|
684 // float x[84+1000];
|
mail@81
|
685 // for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
mail@81
|
686 // vector<int> signifIndex;
|
mail@81
|
687 // int index=0;
|
mail@81
|
688 // sumb /= 84.0;
|
mail@81
|
689 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
mail@81
|
690 // float currval = 0;
|
mail@81
|
691 // currval += b[iNote + 1 + -1];
|
mail@81
|
692 // currval += b[iNote + 1 + 0];
|
mail@81
|
693 // currval += b[iNote + 1 + 1];
|
mail@81
|
694 // if (currval > 0) signifIndex.push_back(index);
|
mail@81
|
695 // f3.values.push_back(0); // fill the values, change later
|
mail@81
|
696 // index++;
|
mail@81
|
697 // }
|
mail@81
|
698 // float rnorm;
|
mail@81
|
699 // float w[84+1000];
|
mail@81
|
700 // float zz[84+1000];
|
mail@81
|
701 // int indx[84+1000];
|
mail@81
|
702 // int mode;
|
mail@81
|
703 // int dictsize = nNote*signifIndex.size();
|
mail@81
|
704 // // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
mail@81
|
705 // float *curr_dict = new float[dictsize];
|
mail@81
|
706 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@81
|
707 // for (unsigned iBin = 0; iBin < nNote; iBin++) {
|
mail@81
|
708 // curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
mail@81
|
709 // }
|
mail@81
|
710 // }
|
mail@81
|
711 // nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
mail@81
|
712 // delete [] curr_dict;
|
mail@81
|
713 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@81
|
714 // f3.values[signifIndex[iNote]] = x[iNote];
|
mail@81
|
715 // // cerr << mode << endl;
|
mail@81
|
716 // chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
mail@81
|
717 // basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
mail@81
|
718 // }
|
mail@81
|
719 // }
|
mail@81
|
720 // }
|
mail@81
|
721 //
|
mail@81
|
722 //
|
mail@81
|
723 //
|
mail@81
|
724 //
|
mail@81
|
725 // f4.values = chroma;
|
mail@81
|
726 // f5.values = basschroma;
|
mail@81
|
727 // chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
mail@81
|
728 // f6.values = chroma;
|
mail@81
|
729 //
|
mail@81
|
730 // if (m_doNormalizeChroma > 0) {
|
mail@81
|
731 // vector<float> chromanorm = vector<float>(3,0);
|
mail@81
|
732 // switch (int(m_doNormalizeChroma)) {
|
mail@81
|
733 // case 0: // should never end up here
|
mail@81
|
734 // break;
|
mail@81
|
735 // case 1:
|
mail@81
|
736 // chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
mail@81
|
737 // chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
mail@81
|
738 // chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
mail@81
|
739 // break;
|
mail@81
|
740 // case 2:
|
mail@81
|
741 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
mail@81
|
742 // chromanorm[0] += *it;
|
mail@81
|
743 // }
|
mail@81
|
744 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
mail@81
|
745 // chromanorm[1] += *it;
|
mail@81
|
746 // }
|
mail@81
|
747 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
mail@81
|
748 // chromanorm[2] += *it;
|
mail@81
|
749 // }
|
mail@81
|
750 // break;
|
mail@81
|
751 // case 3:
|
mail@81
|
752 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
mail@81
|
753 // chromanorm[0] += pow(*it,2);
|
mail@81
|
754 // }
|
mail@81
|
755 // chromanorm[0] = sqrt(chromanorm[0]);
|
mail@81
|
756 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
mail@81
|
757 // chromanorm[1] += pow(*it,2);
|
mail@81
|
758 // }
|
mail@81
|
759 // chromanorm[1] = sqrt(chromanorm[1]);
|
mail@81
|
760 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
mail@81
|
761 // chromanorm[2] += pow(*it,2);
|
mail@81
|
762 // }
|
mail@81
|
763 // chromanorm[2] = sqrt(chromanorm[2]);
|
mail@81
|
764 // break;
|
mail@81
|
765 // }
|
mail@81
|
766 // if (chromanorm[0] > 0) {
|
mail@81
|
767 // for (int i = 0; i < f4.values.size(); i++) {
|
mail@81
|
768 // f4.values[i] /= chromanorm[0];
|
mail@81
|
769 // }
|
mail@81
|
770 // }
|
mail@81
|
771 // if (chromanorm[1] > 0) {
|
mail@81
|
772 // for (int i = 0; i < f5.values.size(); i++) {
|
mail@81
|
773 // f5.values[i] /= chromanorm[1];
|
mail@81
|
774 // }
|
mail@81
|
775 // }
|
mail@81
|
776 // if (chromanorm[2] > 0) {
|
mail@81
|
777 // for (int i = 0; i < f6.values.size(); i++) {
|
mail@81
|
778 // f6.values[i] /= chromanorm[2];
|
mail@81
|
779 // }
|
mail@81
|
780 // }
|
mail@81
|
781 //
|
mail@81
|
782 // }
|
mail@81
|
783 //
|
mail@81
|
784 // // local chord estimation
|
mail@81
|
785 // vector<float> currentChordSalience;
|
mail@81
|
786 // float tempchordvalue = 0;
|
mail@81
|
787 // float sumchordvalue = 0;
|
mail@81
|
788 //
|
mail@81
|
789 // for (int iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
790 // tempchordvalue = 0;
|
mail@81
|
791 // for (int iBin = 0; iBin < 12; iBin++) {
|
mail@81
|
792 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
mail@81
|
793 // }
|
mail@81
|
794 // for (int iBin = 12; iBin < 24; iBin++) {
|
mail@81
|
795 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
mail@81
|
796 // }
|
mail@81
|
797 // sumchordvalue+=tempchordvalue;
|
mail@81
|
798 // currentChordSalience.push_back(tempchordvalue);
|
mail@81
|
799 // }
|
mail@81
|
800 // if (sumchordvalue > 0) {
|
mail@81
|
801 // for (int iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
802 // currentChordSalience[iChord] /= sumchordvalue;
|
mail@81
|
803 // }
|
mail@81
|
804 // } else {
|
mail@81
|
805 // currentChordSalience[nChord-1] = 1.0;
|
mail@81
|
806 // }
|
mail@81
|
807 // chordogram.push_back(currentChordSalience);
|
mail@81
|
808 //
|
mail@81
|
809 // fsOut[3].push_back(f3);
|
mail@81
|
810 // fsOut[4].push_back(f4);
|
mail@81
|
811 // fsOut[5].push_back(f5);
|
mail@81
|
812 // fsOut[6].push_back(f6);
|
mail@81
|
813 // count++;
|
mail@81
|
814 // }
|
mail@81
|
815 // cerr << "done." << endl;
|
mail@81
|
816 //
|
mail@81
|
817 //
|
mail@81
|
818 // /* Simple chord estimation
|
mail@81
|
819 // I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
mail@81
|
820 // take the maximum. Very simple, don't do this at home...
|
mail@81
|
821 // */
|
mail@81
|
822 // cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
mail@81
|
823 // count = 0;
|
mail@81
|
824 // int halfwindowlength = m_inputSampleRate / m_stepSize;
|
mail@81
|
825 // vector<int> chordSequence;
|
mail@81
|
826 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
mail@81
|
827 // vector<int> temp = vector<int>(nChord,0);
|
mail@81
|
828 // scoreChordogram.push_back(temp);
|
mail@81
|
829 // }
|
mail@81
|
830 // for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
mail@81
|
831 // int startIndex = count + 1;
|
mail@81
|
832 // int endIndex = count + 2 * halfwindowlength;
|
mail@81
|
833 //
|
mail@81
|
834 // float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
mail@81
|
835 //
|
mail@81
|
836 // vector<int> chordCandidates;
|
mail@81
|
837 // for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
mail@81
|
838 // // float currsum = 0;
|
mail@81
|
839 // // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
mail@81
|
840 // // currsum += chordogram[iFrame][iChord];
|
mail@81
|
841 // // }
|
mail@81
|
842 // // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
mail@81
|
843 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
mail@81
|
844 // if (chordogram[iFrame][iChord] > chordThreshold) {
|
mail@81
|
845 // chordCandidates.push_back(iChord);
|
mail@81
|
846 // break;
|
mail@81
|
847 // }
|
mail@81
|
848 // }
|
mail@81
|
849 // }
|
mail@81
|
850 // chordCandidates.push_back(nChord-1);
|
mail@81
|
851 // // cerr << chordCandidates.size() << endl;
|
mail@81
|
852 //
|
mail@81
|
853 // float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
mail@81
|
854 // float maxindex = 0; //... and the index thereof
|
mail@81
|
855 // unsigned bestchordL = nChord-1; // index of the best "left" chord
|
mail@81
|
856 // unsigned bestchordR = nChord-1; // index of the best "right" chord
|
mail@81
|
857 //
|
mail@81
|
858 // for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
mail@81
|
859 // // now find the max values on both sides of iWF
|
mail@81
|
860 // // left side:
|
mail@81
|
861 // float maxL = 0;
|
mail@81
|
862 // unsigned maxindL = nChord-1;
|
mail@81
|
863 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
mail@81
|
864 // unsigned iChord = chordCandidates[kChord];
|
mail@81
|
865 // float currsum = 0;
|
mail@81
|
866 // for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
mail@81
|
867 // currsum += chordogram[count+iFrame][iChord];
|
mail@81
|
868 // }
|
mail@81
|
869 // if (iChord == nChord-1) currsum *= 0.8;
|
mail@81
|
870 // if (currsum > maxL) {
|
mail@81
|
871 // maxL = currsum;
|
mail@81
|
872 // maxindL = iChord;
|
mail@81
|
873 // }
|
mail@81
|
874 // }
|
mail@81
|
875 // // right side:
|
mail@81
|
876 // float maxR = 0;
|
mail@81
|
877 // unsigned maxindR = nChord-1;
|
mail@81
|
878 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
mail@81
|
879 // unsigned iChord = chordCandidates[kChord];
|
mail@81
|
880 // float currsum = 0;
|
mail@81
|
881 // for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
mail@81
|
882 // currsum += chordogram[count+iFrame][iChord];
|
mail@81
|
883 // }
|
mail@81
|
884 // if (iChord == nChord-1) currsum *= 0.8;
|
mail@81
|
885 // if (currsum > maxR) {
|
mail@81
|
886 // maxR = currsum;
|
mail@81
|
887 // maxindR = iChord;
|
mail@81
|
888 // }
|
mail@81
|
889 // }
|
mail@81
|
890 // if (maxL+maxR > maxval) {
|
mail@81
|
891 // maxval = maxL+maxR;
|
mail@81
|
892 // maxindex = iWF;
|
mail@81
|
893 // bestchordL = maxindL;
|
mail@81
|
894 // bestchordR = maxindR;
|
mail@81
|
895 // }
|
mail@81
|
896 //
|
mail@81
|
897 // }
|
mail@81
|
898 // // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
mail@81
|
899 // // add a score to every chord-frame-point that was part of a maximum
|
mail@81
|
900 // for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
mail@81
|
901 // scoreChordogram[iFrame+count][bestchordL]++;
|
mail@81
|
902 // }
|
mail@81
|
903 // for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
mail@81
|
904 // scoreChordogram[iFrame+count][bestchordR]++;
|
mail@81
|
905 // }
|
mail@81
|
906 // if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
mail@81
|
907 // count++;
|
mail@81
|
908 // }
|
mail@81
|
909 // // cerr << "******* agent finished *******" << endl;
|
mail@81
|
910 // count = 0;
|
mail@81
|
911 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
mail@81
|
912 // float maxval = 0; // will be the value of the most salient chord in this frame
|
mail@81
|
913 // float maxindex = 0; //... and the index thereof
|
mail@81
|
914 // for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
915 // if (scoreChordogram[count][iChord] > maxval) {
|
mail@81
|
916 // maxval = scoreChordogram[count][iChord];
|
mail@81
|
917 // maxindex = iChord;
|
mail@81
|
918 // // cerr << iChord << endl;
|
mail@81
|
919 // }
|
mail@81
|
920 // }
|
mail@81
|
921 // chordSequence.push_back(maxindex);
|
mail@81
|
922 // // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
mail@81
|
923 // count++;
|
mail@81
|
924 // }
|
mail@81
|
925 // // cerr << "******* mode filter done *******" << endl;
|
mail@81
|
926 //
|
mail@81
|
927 //
|
mail@81
|
928 // // mode filter on chordSequence
|
mail@81
|
929 // count = 0;
|
mail@81
|
930 // string oldChord = "";
|
mail@81
|
931 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
mail@81
|
932 // Feature f6 = *it;
|
mail@81
|
933 // Feature f7; // chord estimate
|
mail@81
|
934 // f7.hasTimestamp = true;
|
mail@81
|
935 // f7.timestamp = f6.timestamp;
|
mail@81
|
936 // Feature f8; // chord estimate
|
mail@81
|
937 // f8.hasTimestamp = true;
|
mail@81
|
938 // f8.timestamp = f6.timestamp;
|
mail@81
|
939 //
|
mail@81
|
940 // vector<int> chordCount = vector<int>(nChord,0);
|
mail@81
|
941 // int maxChordCount = 0;
|
mail@81
|
942 // int maxChordIndex = nChord-1;
|
mail@81
|
943 // string maxChord;
|
mail@81
|
944 // int startIndex = max(count - halfwindowlength/2,0);
|
mail@81
|
945 // int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
mail@81
|
946 // for (int i = startIndex; i < endIndex; i++) {
|
mail@81
|
947 // chordCount[chordSequence[i]]++;
|
mail@81
|
948 // if (chordCount[chordSequence[i]] > maxChordCount) {
|
mail@81
|
949 // // cerr << "start index " << startIndex << endl;
|
mail@81
|
950 // maxChordCount++;
|
mail@81
|
951 // maxChordIndex = chordSequence[i];
|
mail@81
|
952 // maxChord = m_chordnames[maxChordIndex];
|
mail@81
|
953 // }
|
mail@81
|
954 // }
|
mail@81
|
955 // // chordSequence[count] = maxChordIndex;
|
mail@81
|
956 // // cerr << maxChordIndex << endl;
|
mail@81
|
957 // f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
mail@81
|
958 // // cerr << chordchange[count] << endl;
|
mail@81
|
959 // fsOut[9].push_back(f8);
|
mail@81
|
960 // if (oldChord != maxChord) {
|
mail@81
|
961 // oldChord = maxChord;
|
mail@81
|
962 //
|
mail@81
|
963 // // char buffer1 [50];
|
mail@81
|
964 // // if (maxChordIndex < nChord - 1) {
|
mail@81
|
965 // // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
mail@81
|
966 // // } else {
|
mail@81
|
967 // // sprintf(buffer1, "N");
|
mail@81
|
968 // // }
|
mail@81
|
969 // // f7.label = buffer1;
|
mail@81
|
970 // f7.label = m_chordnames[maxChordIndex];
|
mail@81
|
971 // fsOut[7].push_back(f7);
|
mail@81
|
972 // }
|
mail@81
|
973 // count++;
|
mail@81
|
974 // }
|
mail@81
|
975 // Feature f7; // last chord estimate
|
mail@81
|
976 // f7.hasTimestamp = true;
|
mail@81
|
977 // f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
mail@81
|
978 // f7.label = "N";
|
mail@81
|
979 // fsOut[7].push_back(f7);
|
mail@81
|
980 // cerr << "done." << endl;
|
mail@81
|
981 // // // musicity
|
mail@81
|
982 // // count = 0;
|
mail@81
|
983 // // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
mail@81
|
984 // // vector<float> musicityValue;
|
mail@81
|
985 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
mail@81
|
986 // // Feature f4 = *it;
|
mail@81
|
987 // //
|
mail@81
|
988 // // int startIndex = max(count - musicitykernelwidth/2,0);
|
mail@81
|
989 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
mail@81
|
990 // // float chromasum = 0;
|
mail@81
|
991 // // float diffsum = 0;
|
mail@81
|
992 // // for (int k = 0; k < 12; k++) {
|
mail@81
|
993 // // for (int i = startIndex + 1; i < endIndex; i++) {
|
mail@81
|
994 // // chromasum += pow(fsOut[4][i].values[k],2);
|
mail@81
|
995 // // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
mail@81
|
996 // // }
|
mail@81
|
997 // // }
|
mail@81
|
998 // // diffsum /= chromasum;
|
mail@81
|
999 // // musicityValue.push_back(diffsum);
|
mail@81
|
1000 // // count++;
|
mail@81
|
1001 // // }
|
mail@81
|
1002 // //
|
mail@81
|
1003 // // float musicityThreshold = 0.44;
|
mail@81
|
1004 // // if (m_stepSize == 4096) {
|
mail@81
|
1005 // // musicityThreshold = 0.74;
|
mail@81
|
1006 // // }
|
mail@81
|
1007 // // if (m_stepSize == 4410) {
|
mail@81
|
1008 // // musicityThreshold = 0.77;
|
mail@81
|
1009 // // }
|
mail@81
|
1010 // //
|
mail@81
|
1011 // // count = 0;
|
mail@81
|
1012 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
mail@81
|
1013 // // Feature f4 = *it;
|
mail@81
|
1014 // // Feature f8; // musicity
|
mail@81
|
1015 // // Feature f9; // musicity segmenter
|
mail@81
|
1016 // //
|
mail@81
|
1017 // // f8.hasTimestamp = true;
|
mail@81
|
1018 // // f8.timestamp = f4.timestamp;
|
mail@81
|
1019 // // f9.hasTimestamp = true;
|
mail@81
|
1020 // // f9.timestamp = f4.timestamp;
|
mail@81
|
1021 // //
|
mail@81
|
1022 // // int startIndex = max(count - musicitykernelwidth/2,0);
|
mail@81
|
1023 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
mail@81
|
1024 // // int musicityCount = 0;
|
mail@81
|
1025 // // for (int i = startIndex; i <= endIndex; i++) {
|
mail@81
|
1026 // // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
mail@81
|
1027 // // }
|
mail@81
|
1028 // // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
mail@81
|
1029 // //
|
mail@81
|
1030 // // if (isSpeech) {
|
mail@81
|
1031 // // if (oldlabeltype != 2) {
|
mail@81
|
1032 // // f9.label = "Speech";
|
mail@81
|
1033 // // fsOut[9].push_back(f9);
|
mail@81
|
1034 // // oldlabeltype = 2;
|
mail@81
|
1035 // // }
|
mail@81
|
1036 // // } else {
|
mail@81
|
1037 // // if (oldlabeltype != 1) {
|
mail@81
|
1038 // // f9.label = "Music";
|
mail@81
|
1039 // // fsOut[9].push_back(f9);
|
mail@81
|
1040 // // oldlabeltype = 1;
|
mail@81
|
1041 // // }
|
mail@81
|
1042 // // }
|
mail@81
|
1043 // // f8.values.push_back(musicityValue[count]);
|
mail@81
|
1044 // // fsOut[8].push_back(f8);
|
mail@81
|
1045 // // count++;
|
mail@81
|
1046 // // }
|
Chris@23
|
1047 return fsOut;
|
matthiasm@0
|
1048
|
matthiasm@0
|
1049 }
|
matthiasm@0
|
1050
|
Chris@35
|
1051 #endif
|