Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@35
|
31 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
32 Plugin(inputSampleRate),
|
mail@89
|
33 m_frameCount(0),
|
Chris@35
|
34 m_logSpectrum(0),
|
Chris@23
|
35 m_blockSize(0),
|
Chris@23
|
36 m_stepSize(0),
|
Chris@23
|
37 m_lengthOfNoteIndex(0),
|
mail@80
|
38 m_meanTunings(0),
|
mail@80
|
39 m_localTunings(0),
|
mail@41
|
40 m_whitening(1.0),
|
Chris@23
|
41 m_preset(0.0),
|
mail@89
|
42 m_useNNLS(1),
|
mail@89
|
43 m_useHMM(1),
|
Chris@23
|
44 m_localTuning(0),
|
Chris@23
|
45 m_kernelValue(0),
|
Chris@23
|
46 m_kernelFftIndex(0),
|
Chris@23
|
47 m_kernelNoteIndex(0),
|
Chris@23
|
48 m_dict(0),
|
mail@60
|
49 m_tuneLocal(0),
|
Chris@23
|
50 m_doNormalizeChroma(0),
|
mail@60
|
51 m_rollon(0),
|
mail@89
|
52 m_boostN(1.1),
|
matthiasm@42
|
53 m_s(0.7),
|
mail@80
|
54 sinvalues(0),
|
mail@80
|
55 cosvalues(0)
|
matthiasm@0
|
56 {
|
Chris@35
|
57 if (debug_on) cerr << "--> NNLSBase" << endl;
|
matthiasm@7
|
58
|
Chris@23
|
59 // make the *note* dictionary matrix
|
Chris@23
|
60 m_dict = new float[nNote * 84];
|
Chris@23
|
61 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
62 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@0
|
63 }
|
matthiasm@0
|
64
|
matthiasm@0
|
65
|
Chris@35
|
66 NNLSBase::~NNLSBase()
|
matthiasm@0
|
67 {
|
Chris@35
|
68 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
69 delete [] m_dict;
|
matthiasm@0
|
70 }
|
matthiasm@0
|
71
|
matthiasm@0
|
72 string
|
Chris@35
|
73 NNLSBase::getMaker() const
|
matthiasm@0
|
74 {
|
Chris@23
|
75 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
76 // Your name here
|
matthiasm@0
|
77 return "Matthias Mauch";
|
matthiasm@0
|
78 }
|
matthiasm@0
|
79
|
matthiasm@0
|
80 int
|
Chris@35
|
81 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
82 {
|
Chris@23
|
83 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
84 // Increment this each time you release a version that behaves
|
matthiasm@0
|
85 // differently from the previous one
|
matthiasm@0
|
86 return 1;
|
matthiasm@0
|
87 }
|
matthiasm@0
|
88
|
matthiasm@0
|
89 string
|
Chris@35
|
90 NNLSBase::getCopyright() const
|
matthiasm@0
|
91 {
|
Chris@23
|
92 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
93 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
94 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
95 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
96 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
97 return "GPL";
|
matthiasm@0
|
98 }
|
matthiasm@0
|
99
|
Chris@35
|
100 NNLSBase::InputDomain
|
Chris@35
|
101 NNLSBase::getInputDomain() const
|
matthiasm@0
|
102 {
|
Chris@23
|
103 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
104 return FrequencyDomain;
|
matthiasm@0
|
105 }
|
matthiasm@0
|
106
|
matthiasm@0
|
107 size_t
|
Chris@35
|
108 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
109 {
|
Chris@23
|
110 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
111 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
112 }
|
matthiasm@0
|
113
|
matthiasm@0
|
114 size_t
|
Chris@35
|
115 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
116 {
|
Chris@23
|
117 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
118 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
119 // means the same as the block size for TimeDomain
|
Chris@23
|
120 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
121 }
|
matthiasm@0
|
122
|
matthiasm@0
|
123 size_t
|
Chris@35
|
124 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
125 {
|
Chris@23
|
126 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
127 return 1;
|
matthiasm@0
|
128 }
|
matthiasm@0
|
129
|
matthiasm@0
|
130 size_t
|
Chris@35
|
131 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
132 {
|
Chris@23
|
133 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
134 return 1;
|
matthiasm@0
|
135 }
|
matthiasm@0
|
136
|
Chris@35
|
137 NNLSBase::ParameterList
|
Chris@35
|
138 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
139 {
|
Chris@23
|
140 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
141 ParameterList list;
|
matthiasm@0
|
142
|
matthiasm@42
|
143 ParameterDescriptor d;
|
matthiasm@42
|
144 d.identifier = "useNNLS";
|
matthiasm@42
|
145 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
146 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
147 d.unit = "";
|
matthiasm@42
|
148 d.minValue = 0.0;
|
matthiasm@42
|
149 d.maxValue = 1.0;
|
matthiasm@42
|
150 d.defaultValue = 1.0;
|
matthiasm@42
|
151 d.isQuantized = true;
|
matthiasm@42
|
152 d.quantizeStep = 1.0;
|
matthiasm@42
|
153 list.push_back(d);
|
matthiasm@42
|
154
|
mail@41
|
155 ParameterDescriptor d0;
|
mail@41
|
156 d0.identifier = "rollon";
|
mail@41
|
157 d0.name = "spectral roll-on";
|
matthiasm@58
|
158 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
159 d0.unit = "%";
|
mail@41
|
160 d0.minValue = 0;
|
matthiasm@59
|
161 d0.maxValue = 5;
|
mail@41
|
162 d0.defaultValue = 0;
|
matthiasm@48
|
163 d0.isQuantized = true;
|
matthiasm@59
|
164 d0.quantizeStep = 0.5;
|
mail@41
|
165 list.push_back(d0);
|
matthiasm@4
|
166
|
matthiasm@4
|
167 ParameterDescriptor d1;
|
matthiasm@4
|
168 d1.identifier = "tuningmode";
|
matthiasm@4
|
169 d1.name = "tuning mode";
|
matthiasm@4
|
170 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
171 d1.unit = "";
|
matthiasm@4
|
172 d1.minValue = 0;
|
matthiasm@4
|
173 d1.maxValue = 1;
|
matthiasm@4
|
174 d1.defaultValue = 0;
|
matthiasm@4
|
175 d1.isQuantized = true;
|
matthiasm@4
|
176 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
177 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
178 d1.quantizeStep = 1.0;
|
matthiasm@4
|
179 list.push_back(d1);
|
matthiasm@4
|
180
|
mail@41
|
181 ParameterDescriptor d2;
|
mail@41
|
182 d2.identifier = "whitening";
|
mail@41
|
183 d2.name = "spectral whitening";
|
mail@41
|
184 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
185 d2.unit = "";
|
mail@41
|
186 d2.isQuantized = true;
|
mail@41
|
187 d2.minValue = 0.0;
|
mail@41
|
188 d2.maxValue = 1.0;
|
mail@41
|
189 d2.defaultValue = 1.0;
|
mail@41
|
190 d2.isQuantized = false;
|
mail@41
|
191 list.push_back(d2);
|
mail@41
|
192
|
mail@41
|
193 ParameterDescriptor d3;
|
mail@41
|
194 d3.identifier = "s";
|
mail@41
|
195 d3.name = "spectral shape";
|
mail@41
|
196 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
197 d3.unit = "";
|
mail@41
|
198 d3.minValue = 0.5;
|
mail@41
|
199 d3.maxValue = 0.9;
|
mail@41
|
200 d3.defaultValue = 0.7;
|
mail@41
|
201 d3.isQuantized = false;
|
mail@41
|
202 list.push_back(d3);
|
mail@41
|
203
|
Chris@23
|
204 ParameterDescriptor d4;
|
matthiasm@12
|
205 d4.identifier = "chromanormalize";
|
matthiasm@12
|
206 d4.name = "chroma normalization";
|
matthiasm@12
|
207 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
208 d4.unit = "";
|
matthiasm@12
|
209 d4.minValue = 0;
|
matthiasm@13
|
210 d4.maxValue = 3;
|
matthiasm@12
|
211 d4.defaultValue = 0;
|
matthiasm@12
|
212 d4.isQuantized = true;
|
matthiasm@13
|
213 d4.valueNames.push_back("none");
|
matthiasm@13
|
214 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
215 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
216 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
217 d4.quantizeStep = 1.0;
|
matthiasm@12
|
218 list.push_back(d4);
|
matthiasm@4
|
219
|
matthiasm@0
|
220 return list;
|
matthiasm@0
|
221 }
|
matthiasm@0
|
222
|
matthiasm@0
|
223 float
|
Chris@35
|
224 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
225 {
|
Chris@23
|
226 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
227 if (identifier == "useNNLS") {
|
matthiasm@42
|
228 return m_useNNLS;
|
matthiasm@0
|
229 }
|
matthiasm@0
|
230
|
mail@41
|
231 if (identifier == "whitening") {
|
mail@41
|
232 return m_whitening;
|
mail@41
|
233 }
|
mail@41
|
234
|
mail@41
|
235 if (identifier == "s") {
|
mail@41
|
236 return m_s;
|
matthiasm@0
|
237 }
|
matthiasm@17
|
238
|
Chris@23
|
239 if (identifier == "rollon") {
|
matthiasm@17
|
240 return m_rollon;
|
matthiasm@17
|
241 }
|
matthiasm@0
|
242
|
mail@89
|
243 if (identifier == "boostn") {
|
mail@89
|
244 return m_boostN;
|
mail@89
|
245 }
|
mail@89
|
246
|
matthiasm@0
|
247 if (identifier == "tuningmode") {
|
matthiasm@0
|
248 if (m_tuneLocal) {
|
matthiasm@0
|
249 return 1.0;
|
matthiasm@0
|
250 } else {
|
matthiasm@0
|
251 return 0.0;
|
matthiasm@0
|
252 }
|
matthiasm@0
|
253 }
|
Chris@23
|
254 if (identifier == "preset") {
|
Chris@23
|
255 return m_preset;
|
matthiasm@3
|
256 }
|
Chris@23
|
257 if (identifier == "chromanormalize") {
|
Chris@23
|
258 return m_doNormalizeChroma;
|
matthiasm@12
|
259 }
|
matthiasm@50
|
260
|
matthiasm@50
|
261 if (identifier == "useHMM") {
|
matthiasm@50
|
262 return m_useHMM;
|
matthiasm@50
|
263 }
|
matthiasm@50
|
264
|
matthiasm@0
|
265 return 0;
|
matthiasm@0
|
266
|
matthiasm@0
|
267 }
|
matthiasm@0
|
268
|
matthiasm@0
|
269 void
|
Chris@35
|
270 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
271 {
|
Chris@23
|
272 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
273 if (identifier == "useNNLS") {
|
matthiasm@42
|
274 m_useNNLS = (int) value;
|
matthiasm@0
|
275 }
|
matthiasm@0
|
276
|
mail@41
|
277 if (identifier == "whitening") {
|
mail@41
|
278 m_whitening = value;
|
matthiasm@0
|
279 }
|
matthiasm@0
|
280
|
mail@41
|
281 if (identifier == "s") {
|
mail@41
|
282 m_s = value;
|
mail@41
|
283 }
|
mail@41
|
284
|
matthiasm@50
|
285 if (identifier == "useHMM") {
|
matthiasm@50
|
286 m_useHMM = value;
|
matthiasm@50
|
287 }
|
matthiasm@50
|
288
|
mail@89
|
289 if (identifier == "boostn") {
|
mail@89
|
290 m_boostN = value;
|
mail@89
|
291 }
|
mail@89
|
292
|
matthiasm@0
|
293 if (identifier == "tuningmode") {
|
mail@60
|
294 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
295 m_tuneLocal = value;
|
matthiasm@0
|
296 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
297 }
|
matthiasm@42
|
298 // if (identifier == "preset") {
|
matthiasm@42
|
299 // m_preset = value;
|
matthiasm@42
|
300 // if (m_preset == 0.0) {
|
matthiasm@42
|
301 // m_tuneLocal = false;
|
matthiasm@42
|
302 // m_whitening = 1.0;
|
matthiasm@42
|
303 // m_dictID = 0.0;
|
matthiasm@42
|
304 // }
|
matthiasm@42
|
305 // if (m_preset == 1.0) {
|
matthiasm@42
|
306 // m_tuneLocal = false;
|
matthiasm@42
|
307 // m_whitening = 1.0;
|
matthiasm@42
|
308 // m_dictID = 1.0;
|
matthiasm@42
|
309 // }
|
matthiasm@42
|
310 // if (m_preset == 2.0) {
|
matthiasm@42
|
311 // m_tuneLocal = false;
|
matthiasm@42
|
312 // m_whitening = 0.7;
|
matthiasm@42
|
313 // m_dictID = 0.0;
|
matthiasm@42
|
314 // }
|
matthiasm@42
|
315 // }
|
Chris@23
|
316 if (identifier == "chromanormalize") {
|
Chris@23
|
317 m_doNormalizeChroma = value;
|
Chris@23
|
318 }
|
matthiasm@17
|
319
|
Chris@23
|
320 if (identifier == "rollon") {
|
Chris@23
|
321 m_rollon = value;
|
Chris@23
|
322 }
|
matthiasm@0
|
323 }
|
matthiasm@0
|
324
|
Chris@35
|
325 NNLSBase::ProgramList
|
Chris@35
|
326 NNLSBase::getPrograms() const
|
matthiasm@0
|
327 {
|
Chris@23
|
328 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
329 ProgramList list;
|
matthiasm@0
|
330
|
matthiasm@0
|
331 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
332 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
333
|
matthiasm@0
|
334 return list;
|
matthiasm@0
|
335 }
|
matthiasm@0
|
336
|
matthiasm@0
|
337 string
|
Chris@35
|
338 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
339 {
|
Chris@23
|
340 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
341 return ""; // no programs
|
matthiasm@0
|
342 }
|
matthiasm@0
|
343
|
matthiasm@0
|
344 void
|
Chris@35
|
345 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
346 {
|
Chris@23
|
347 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
348 }
|
matthiasm@0
|
349
|
matthiasm@0
|
350
|
matthiasm@0
|
351 bool
|
Chris@35
|
352 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
353 {
|
Chris@23
|
354 if (debug_on) {
|
Chris@23
|
355 cerr << "--> initialise";
|
Chris@23
|
356 }
|
matthiasm@1
|
357
|
mail@80
|
358 // make things for tuning estimation
|
mail@80
|
359 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
360 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
361 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
362 }
|
mail@80
|
363
|
mail@80
|
364
|
mail@80
|
365 // make hamming window of length 1/2 octave
|
mail@76
|
366 int hamwinlength = nBPS * 6 + 1;
|
mail@76
|
367 float hamwinsum = 0;
|
mail@76
|
368 for (int i = 0; i < hamwinlength; ++i) {
|
mail@76
|
369 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
mail@76
|
370 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
mail@76
|
371 }
|
mail@77
|
372 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
|
mail@80
|
373
|
mail@80
|
374
|
mail@80
|
375 // initialise the tuning
|
mail@80
|
376 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
377 m_meanTunings.push_back(0);
|
mail@80
|
378 m_localTunings.push_back(0);
|
mail@80
|
379 }
|
mail@76
|
380
|
matthiasm@0
|
381 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
382 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
383 m_blockSize = blockSize;
|
matthiasm@0
|
384 m_stepSize = stepSize;
|
Chris@35
|
385 m_frameCount = 0;
|
mail@77
|
386 int tempn = nNote * m_blockSize/2;
|
Chris@23
|
387 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
388 float *tempkernel;
|
matthiasm@1
|
389
|
Chris@23
|
390 tempkernel = new float[tempn];
|
matthiasm@1
|
391
|
Chris@23
|
392 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
393 m_kernelValue.clear();
|
Chris@23
|
394 m_kernelFftIndex.clear();
|
Chris@23
|
395 m_kernelNoteIndex.clear();
|
Chris@23
|
396 int countNonzero = 0;
|
Chris@23
|
397 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
398 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
399 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
400 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
401 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
402 countNonzero++;
|
Chris@23
|
403 }
|
Chris@23
|
404 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
405 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
406 }
|
Chris@23
|
407 }
|
Chris@23
|
408 }
|
Chris@23
|
409 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
410 delete [] tempkernel;
|
Chris@35
|
411 /*
|
Chris@23
|
412 ofstream myfile;
|
Chris@23
|
413 myfile.open ("matrix.txt");
|
matthiasm@3
|
414 // myfile << "Writing this to a file.\n";
|
Chris@23
|
415 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
416 myfile << m_dict[i] << endl;
|
Chris@23
|
417 }
|
matthiasm@3
|
418 myfile.close();
|
Chris@35
|
419 */
|
matthiasm@0
|
420 return true;
|
matthiasm@0
|
421 }
|
matthiasm@0
|
422
|
matthiasm@0
|
423 void
|
Chris@35
|
424 NNLSBase::reset()
|
matthiasm@0
|
425 {
|
Chris@23
|
426 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
427
|
matthiasm@0
|
428 // Clear buffers, reset stored values, etc
|
Chris@35
|
429 m_frameCount = 0;
|
matthiasm@42
|
430 // m_dictID = 0;
|
Chris@35
|
431 m_logSpectrum.clear();
|
mail@80
|
432 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
433 m_meanTunings[iBPS] = 0;
|
mail@80
|
434 m_localTunings[iBPS] = 0;
|
mail@80
|
435 }
|
Chris@23
|
436 m_localTuning.clear();
|
matthiasm@0
|
437 }
|
matthiasm@0
|
438
|
Chris@35
|
439 void
|
Chris@35
|
440 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
441 {
|
Chris@35
|
442 m_frameCount++;
|
Chris@23
|
443 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
444
|
Chris@23
|
445 const float *fbuf = inputBuffers[0];
|
Chris@23
|
446 float energysum = 0;
|
Chris@23
|
447 // make magnitude
|
Chris@23
|
448 float maxmag = -10000;
|
Chris@23
|
449 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
450 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
451 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
452 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
453 if (m_rollon > 0) {
|
Chris@23
|
454 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
455 }
|
Chris@23
|
456 }
|
matthiasm@14
|
457
|
Chris@23
|
458 float cumenergy = 0;
|
Chris@23
|
459 if (m_rollon > 0) {
|
Chris@23
|
460 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
461 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
462 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
463 else break;
|
Chris@23
|
464 }
|
Chris@23
|
465 }
|
matthiasm@17
|
466
|
Chris@23
|
467 if (maxmag < 2) {
|
Chris@23
|
468 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
469 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
470 magnitude[iBin] = 0;
|
Chris@23
|
471 }
|
Chris@23
|
472 }
|
matthiasm@4
|
473
|
Chris@23
|
474 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
475 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
476 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
477 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
478 }
|
Chris@23
|
479 int binCount = 0;
|
Chris@23
|
480 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
481 // cerr << ".";
|
Chris@23
|
482 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
483 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
484 binCount++;
|
Chris@23
|
485 }
|
Chris@23
|
486 // cerr << nm[20];
|
Chris@23
|
487 // cerr << endl;
|
matthiasm@0
|
488
|
matthiasm@0
|
489
|
Chris@35
|
490 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
491 // update means of complex tuning variables
|
mail@80
|
492 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
|
mail@80
|
493
|
mail@80
|
494 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
|
mail@80
|
495 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
|
Chris@23
|
496 float ratioOld = 0.997;
|
mail@80
|
497 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
498 m_localTunings[iBPS] *= ratioOld;
|
mail@80
|
499 m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
|
mail@80
|
500 }
|
matthiasm@0
|
501 }
|
matthiasm@0
|
502 // if (m_tuneLocal) {
|
Chris@23
|
503 // local tuning
|
mail@80
|
504 // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
|
mail@80
|
505 // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
|
mail@80
|
506
|
mail@80
|
507 float localTuningImag = 0;
|
mail@80
|
508 float localTuningReal = 0;
|
mail@80
|
509 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
510 localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
511 localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
512 }
|
mail@80
|
513
|
Chris@23
|
514 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
515 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
516
|
Chris@23
|
517 Feature f1; // logfreqspec
|
Chris@23
|
518 f1.hasTimestamp = true;
|
matthiasm@0
|
519 f1.timestamp = timestamp;
|
Chris@23
|
520 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
521 f1.values.push_back(nm[iNote]);
|
Chris@23
|
522 }
|
matthiasm@0
|
523
|
matthiasm@0
|
524 // deletes
|
matthiasm@0
|
525 delete[] magnitude;
|
matthiasm@0
|
526 delete[] nm;
|
matthiasm@0
|
527
|
Chris@35
|
528 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
529 }
|
matthiasm@0
|
530
|
Chris@35
|
531
|
Chris@35
|
532 #ifdef NOT_DEFINED
|
Chris@35
|
533
|
Chris@35
|
534 NNLSBase::FeatureSet
|
Chris@35
|
535 NNLSBase::getRemainingFeatures()
|
matthiasm@0
|
536 {
|
mail@81
|
537 // if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
mail@81
|
538 FeatureSet fsOut;
|
mail@81
|
539 // if (m_logSpectrum.size() == 0) return fsOut;
|
mail@81
|
540 // int nChord = m_chordnames.size();
|
mail@81
|
541 // //
|
mail@81
|
542 // /** Calculate Tuning
|
mail@81
|
543 // calculate tuning from (using the angle of the complex number defined by the
|
mail@81
|
544 // cumulative mean real and imag values)
|
mail@81
|
545 // **/
|
mail@81
|
546 // float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
|
mail@81
|
547 // float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
|
mail@81
|
548 // float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
mail@81
|
549 // float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
mail@81
|
550 // int intShift = floor(normalisedtuning * 3);
|
mail@81
|
551 // float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
mail@81
|
552 //
|
mail@81
|
553 // char buffer0 [50];
|
mail@81
|
554 //
|
mail@81
|
555 // sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
mail@81
|
556 //
|
mail@81
|
557 // // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
mail@81
|
558 //
|
mail@81
|
559 // // push tuning to FeatureSet fsOut
|
mail@81
|
560 // Feature f0; // tuning
|
mail@81
|
561 // f0.hasTimestamp = true;
|
mail@81
|
562 // f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
mail@81
|
563 // f0.label = buffer0;
|
mail@81
|
564 // fsOut[0].push_back(f0);
|
mail@81
|
565 //
|
mail@81
|
566 // /** Tune Log-Frequency Spectrogram
|
mail@81
|
567 // calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
mail@81
|
568 // perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
mail@81
|
569 // **/
|
mail@81
|
570 // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
mail@81
|
571 //
|
mail@81
|
572 // float tempValue = 0;
|
mail@81
|
573 // float dbThreshold = 0; // relative to the background spectrum
|
mail@81
|
574 // float thresh = pow(10,dbThreshold/20);
|
mail@81
|
575 // // cerr << "tune local ? " << m_tuneLocal << endl;
|
mail@81
|
576 // int count = 0;
|
mail@81
|
577 //
|
mail@81
|
578 // for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
mail@81
|
579 // Feature f1 = *i;
|
mail@81
|
580 // Feature f2; // tuned log-frequency spectrum
|
mail@81
|
581 // f2.hasTimestamp = true;
|
mail@81
|
582 // f2.timestamp = f1.timestamp;
|
mail@81
|
583 // f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
mail@81
|
584 //
|
mail@81
|
585 // if (m_tuneLocal == 1.0) {
|
mail@81
|
586 // intShift = floor(m_localTuning[count] * 3);
|
mail@81
|
587 // floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
mail@81
|
588 // }
|
mail@81
|
589 //
|
mail@81
|
590 // // cerr << intShift << " " << floatShift << endl;
|
mail@81
|
591 //
|
mail@81
|
592 // for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@81
|
593 // tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
|
mail@81
|
594 // f2.values.push_back(tempValue);
|
mail@81
|
595 // }
|
mail@81
|
596 //
|
mail@81
|
597 // f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
mail@81
|
598 // vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
mail@81
|
599 // vector<float> runningstd;
|
mail@81
|
600 // for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
mail@81
|
601 // runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
mail@81
|
602 // }
|
mail@81
|
603 // runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@81
|
604 // for (int i = 0; i < nNote; i++) {
|
mail@81
|
605 // runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
mail@81
|
606 // if (runningstd[i] > 0) {
|
mail@81
|
607 // // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@81
|
608 // // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
mail@81
|
609 // f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@81
|
610 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
mail@81
|
611 // }
|
mail@81
|
612 // if (f2.values[i] < 0) {
|
mail@81
|
613 // cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
mail@81
|
614 // }
|
mail@81
|
615 // }
|
mail@81
|
616 // fsOut[2].push_back(f2);
|
mail@81
|
617 // count++;
|
mail@81
|
618 // }
|
mail@81
|
619 // cerr << "done." << endl;
|
mail@81
|
620 //
|
mail@81
|
621 // /** Semitone spectrum and chromagrams
|
mail@81
|
622 // Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
mail@81
|
623 // is inferred using a non-negative least squares algorithm.
|
mail@81
|
624 // Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
mail@81
|
625 // bass and treble stacked onto each other).
|
mail@81
|
626 // **/
|
mail@81
|
627 // if (m_useNNLS == 0) {
|
mail@81
|
628 // cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
mail@81
|
629 // } else {
|
mail@81
|
630 // cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
mail@81
|
631 // }
|
Chris@23
|
632 //
|
mail@81
|
633 //
|
mail@81
|
634 // vector<vector<float> > chordogram;
|
mail@81
|
635 // vector<vector<int> > scoreChordogram;
|
mail@81
|
636 // vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
mail@81
|
637 // vector<float> oldchroma = vector<float>(12,0);
|
mail@81
|
638 // vector<float> oldbasschroma = vector<float>(12,0);
|
mail@81
|
639 // count = 0;
|
mail@81
|
640 //
|
mail@81
|
641 // for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
mail@81
|
642 // Feature f2 = *it; // logfreq spectrum
|
mail@81
|
643 // Feature f3; // semitone spectrum
|
mail@81
|
644 // Feature f4; // treble chromagram
|
mail@81
|
645 // Feature f5; // bass chromagram
|
mail@81
|
646 // Feature f6; // treble and bass chromagram
|
mail@81
|
647 //
|
mail@81
|
648 // f3.hasTimestamp = true;
|
mail@81
|
649 // f3.timestamp = f2.timestamp;
|
mail@81
|
650 //
|
mail@81
|
651 // f4.hasTimestamp = true;
|
mail@81
|
652 // f4.timestamp = f2.timestamp;
|
mail@81
|
653 //
|
mail@81
|
654 // f5.hasTimestamp = true;
|
mail@81
|
655 // f5.timestamp = f2.timestamp;
|
mail@81
|
656 //
|
mail@81
|
657 // f6.hasTimestamp = true;
|
mail@81
|
658 // f6.timestamp = f2.timestamp;
|
mail@81
|
659 //
|
mail@81
|
660 // float b[nNote];
|
mail@81
|
661 //
|
mail@81
|
662 // bool some_b_greater_zero = false;
|
mail@81
|
663 // float sumb = 0;
|
mail@81
|
664 // for (int i = 0; i < nNote; i++) {
|
mail@81
|
665 // // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
mail@81
|
666 // b[i] = f2.values[i];
|
mail@81
|
667 // sumb += b[i];
|
mail@81
|
668 // if (b[i] > 0) {
|
mail@81
|
669 // some_b_greater_zero = true;
|
mail@81
|
670 // }
|
mail@81
|
671 // }
|
mail@81
|
672 //
|
mail@81
|
673 // // here's where the non-negative least squares algorithm calculates the note activation x
|
mail@81
|
674 //
|
mail@81
|
675 // vector<float> chroma = vector<float>(12, 0);
|
mail@81
|
676 // vector<float> basschroma = vector<float>(12, 0);
|
mail@81
|
677 // float currval;
|
mail@81
|
678 // unsigned iSemitone = 0;
|
mail@81
|
679 //
|
mail@81
|
680 // if (some_b_greater_zero) {
|
mail@81
|
681 // if (m_useNNLS == 0) {
|
mail@81
|
682 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
mail@81
|
683 // currval = 0;
|
mail@81
|
684 // currval += b[iNote + 1 + -1] * 0.5;
|
mail@81
|
685 // currval += b[iNote + 1 + 0] * 1.0;
|
mail@81
|
686 // currval += b[iNote + 1 + 1] * 0.5;
|
mail@81
|
687 // f3.values.push_back(currval);
|
mail@81
|
688 // chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
mail@81
|
689 // basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
mail@81
|
690 // iSemitone++;
|
mail@81
|
691 // }
|
mail@81
|
692 //
|
mail@81
|
693 // } else {
|
mail@81
|
694 // float x[84+1000];
|
mail@81
|
695 // for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
mail@81
|
696 // vector<int> signifIndex;
|
mail@81
|
697 // int index=0;
|
mail@81
|
698 // sumb /= 84.0;
|
mail@81
|
699 // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
mail@81
|
700 // float currval = 0;
|
mail@81
|
701 // currval += b[iNote + 1 + -1];
|
mail@81
|
702 // currval += b[iNote + 1 + 0];
|
mail@81
|
703 // currval += b[iNote + 1 + 1];
|
mail@81
|
704 // if (currval > 0) signifIndex.push_back(index);
|
mail@81
|
705 // f3.values.push_back(0); // fill the values, change later
|
mail@81
|
706 // index++;
|
mail@81
|
707 // }
|
mail@81
|
708 // float rnorm;
|
mail@81
|
709 // float w[84+1000];
|
mail@81
|
710 // float zz[84+1000];
|
mail@81
|
711 // int indx[84+1000];
|
mail@81
|
712 // int mode;
|
mail@81
|
713 // int dictsize = nNote*signifIndex.size();
|
mail@81
|
714 // // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
mail@81
|
715 // float *curr_dict = new float[dictsize];
|
mail@81
|
716 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@81
|
717 // for (unsigned iBin = 0; iBin < nNote; iBin++) {
|
mail@81
|
718 // curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
mail@81
|
719 // }
|
mail@81
|
720 // }
|
mail@81
|
721 // nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
mail@81
|
722 // delete [] curr_dict;
|
mail@81
|
723 // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@81
|
724 // f3.values[signifIndex[iNote]] = x[iNote];
|
mail@81
|
725 // // cerr << mode << endl;
|
mail@81
|
726 // chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
mail@81
|
727 // basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
mail@81
|
728 // }
|
mail@81
|
729 // }
|
mail@81
|
730 // }
|
mail@81
|
731 //
|
mail@81
|
732 //
|
mail@81
|
733 //
|
mail@81
|
734 //
|
mail@81
|
735 // f4.values = chroma;
|
mail@81
|
736 // f5.values = basschroma;
|
mail@81
|
737 // chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
mail@81
|
738 // f6.values = chroma;
|
mail@81
|
739 //
|
mail@81
|
740 // if (m_doNormalizeChroma > 0) {
|
mail@81
|
741 // vector<float> chromanorm = vector<float>(3,0);
|
mail@81
|
742 // switch (int(m_doNormalizeChroma)) {
|
mail@81
|
743 // case 0: // should never end up here
|
mail@81
|
744 // break;
|
mail@81
|
745 // case 1:
|
mail@81
|
746 // chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
mail@81
|
747 // chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
mail@81
|
748 // chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
mail@81
|
749 // break;
|
mail@81
|
750 // case 2:
|
mail@81
|
751 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
mail@81
|
752 // chromanorm[0] += *it;
|
mail@81
|
753 // }
|
mail@81
|
754 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
mail@81
|
755 // chromanorm[1] += *it;
|
mail@81
|
756 // }
|
mail@81
|
757 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
mail@81
|
758 // chromanorm[2] += *it;
|
mail@81
|
759 // }
|
mail@81
|
760 // break;
|
mail@81
|
761 // case 3:
|
mail@81
|
762 // for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
mail@81
|
763 // chromanorm[0] += pow(*it,2);
|
mail@81
|
764 // }
|
mail@81
|
765 // chromanorm[0] = sqrt(chromanorm[0]);
|
mail@81
|
766 // for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
mail@81
|
767 // chromanorm[1] += pow(*it,2);
|
mail@81
|
768 // }
|
mail@81
|
769 // chromanorm[1] = sqrt(chromanorm[1]);
|
mail@81
|
770 // for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
mail@81
|
771 // chromanorm[2] += pow(*it,2);
|
mail@81
|
772 // }
|
mail@81
|
773 // chromanorm[2] = sqrt(chromanorm[2]);
|
mail@81
|
774 // break;
|
mail@81
|
775 // }
|
mail@81
|
776 // if (chromanorm[0] > 0) {
|
mail@81
|
777 // for (int i = 0; i < f4.values.size(); i++) {
|
mail@81
|
778 // f4.values[i] /= chromanorm[0];
|
mail@81
|
779 // }
|
mail@81
|
780 // }
|
mail@81
|
781 // if (chromanorm[1] > 0) {
|
mail@81
|
782 // for (int i = 0; i < f5.values.size(); i++) {
|
mail@81
|
783 // f5.values[i] /= chromanorm[1];
|
mail@81
|
784 // }
|
mail@81
|
785 // }
|
mail@81
|
786 // if (chromanorm[2] > 0) {
|
mail@81
|
787 // for (int i = 0; i < f6.values.size(); i++) {
|
mail@81
|
788 // f6.values[i] /= chromanorm[2];
|
mail@81
|
789 // }
|
mail@81
|
790 // }
|
mail@81
|
791 //
|
mail@81
|
792 // }
|
mail@81
|
793 //
|
mail@81
|
794 // // local chord estimation
|
mail@81
|
795 // vector<float> currentChordSalience;
|
mail@81
|
796 // float tempchordvalue = 0;
|
mail@81
|
797 // float sumchordvalue = 0;
|
mail@81
|
798 //
|
mail@81
|
799 // for (int iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
800 // tempchordvalue = 0;
|
mail@81
|
801 // for (int iBin = 0; iBin < 12; iBin++) {
|
mail@81
|
802 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
mail@81
|
803 // }
|
mail@81
|
804 // for (int iBin = 12; iBin < 24; iBin++) {
|
mail@81
|
805 // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
mail@81
|
806 // }
|
mail@81
|
807 // sumchordvalue+=tempchordvalue;
|
mail@81
|
808 // currentChordSalience.push_back(tempchordvalue);
|
mail@81
|
809 // }
|
mail@81
|
810 // if (sumchordvalue > 0) {
|
mail@81
|
811 // for (int iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
812 // currentChordSalience[iChord] /= sumchordvalue;
|
mail@81
|
813 // }
|
mail@81
|
814 // } else {
|
mail@81
|
815 // currentChordSalience[nChord-1] = 1.0;
|
mail@81
|
816 // }
|
mail@81
|
817 // chordogram.push_back(currentChordSalience);
|
mail@81
|
818 //
|
mail@81
|
819 // fsOut[3].push_back(f3);
|
mail@81
|
820 // fsOut[4].push_back(f4);
|
mail@81
|
821 // fsOut[5].push_back(f5);
|
mail@81
|
822 // fsOut[6].push_back(f6);
|
mail@81
|
823 // count++;
|
mail@81
|
824 // }
|
mail@81
|
825 // cerr << "done." << endl;
|
mail@81
|
826 //
|
mail@81
|
827 //
|
mail@81
|
828 // /* Simple chord estimation
|
mail@81
|
829 // I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
mail@81
|
830 // take the maximum. Very simple, don't do this at home...
|
mail@81
|
831 // */
|
mail@81
|
832 // cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
mail@81
|
833 // count = 0;
|
mail@81
|
834 // int halfwindowlength = m_inputSampleRate / m_stepSize;
|
mail@81
|
835 // vector<int> chordSequence;
|
mail@81
|
836 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
mail@81
|
837 // vector<int> temp = vector<int>(nChord,0);
|
mail@81
|
838 // scoreChordogram.push_back(temp);
|
mail@81
|
839 // }
|
mail@81
|
840 // for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
mail@81
|
841 // int startIndex = count + 1;
|
mail@81
|
842 // int endIndex = count + 2 * halfwindowlength;
|
mail@81
|
843 //
|
mail@81
|
844 // float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
mail@81
|
845 //
|
mail@81
|
846 // vector<int> chordCandidates;
|
mail@81
|
847 // for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
mail@81
|
848 // // float currsum = 0;
|
mail@81
|
849 // // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
mail@81
|
850 // // currsum += chordogram[iFrame][iChord];
|
mail@81
|
851 // // }
|
mail@81
|
852 // // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
mail@81
|
853 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
mail@81
|
854 // if (chordogram[iFrame][iChord] > chordThreshold) {
|
mail@81
|
855 // chordCandidates.push_back(iChord);
|
mail@81
|
856 // break;
|
mail@81
|
857 // }
|
mail@81
|
858 // }
|
mail@81
|
859 // }
|
mail@81
|
860 // chordCandidates.push_back(nChord-1);
|
mail@81
|
861 // // cerr << chordCandidates.size() << endl;
|
mail@81
|
862 //
|
mail@81
|
863 // float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
mail@81
|
864 // float maxindex = 0; //... and the index thereof
|
mail@81
|
865 // unsigned bestchordL = nChord-1; // index of the best "left" chord
|
mail@81
|
866 // unsigned bestchordR = nChord-1; // index of the best "right" chord
|
mail@81
|
867 //
|
mail@81
|
868 // for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
mail@81
|
869 // // now find the max values on both sides of iWF
|
mail@81
|
870 // // left side:
|
mail@81
|
871 // float maxL = 0;
|
mail@81
|
872 // unsigned maxindL = nChord-1;
|
mail@81
|
873 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
mail@81
|
874 // unsigned iChord = chordCandidates[kChord];
|
mail@81
|
875 // float currsum = 0;
|
mail@81
|
876 // for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
mail@81
|
877 // currsum += chordogram[count+iFrame][iChord];
|
mail@81
|
878 // }
|
mail@81
|
879 // if (iChord == nChord-1) currsum *= 0.8;
|
mail@81
|
880 // if (currsum > maxL) {
|
mail@81
|
881 // maxL = currsum;
|
mail@81
|
882 // maxindL = iChord;
|
mail@81
|
883 // }
|
mail@81
|
884 // }
|
mail@81
|
885 // // right side:
|
mail@81
|
886 // float maxR = 0;
|
mail@81
|
887 // unsigned maxindR = nChord-1;
|
mail@81
|
888 // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
mail@81
|
889 // unsigned iChord = chordCandidates[kChord];
|
mail@81
|
890 // float currsum = 0;
|
mail@81
|
891 // for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
mail@81
|
892 // currsum += chordogram[count+iFrame][iChord];
|
mail@81
|
893 // }
|
mail@81
|
894 // if (iChord == nChord-1) currsum *= 0.8;
|
mail@81
|
895 // if (currsum > maxR) {
|
mail@81
|
896 // maxR = currsum;
|
mail@81
|
897 // maxindR = iChord;
|
mail@81
|
898 // }
|
mail@81
|
899 // }
|
mail@81
|
900 // if (maxL+maxR > maxval) {
|
mail@81
|
901 // maxval = maxL+maxR;
|
mail@81
|
902 // maxindex = iWF;
|
mail@81
|
903 // bestchordL = maxindL;
|
mail@81
|
904 // bestchordR = maxindR;
|
mail@81
|
905 // }
|
mail@81
|
906 //
|
mail@81
|
907 // }
|
mail@81
|
908 // // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
mail@81
|
909 // // add a score to every chord-frame-point that was part of a maximum
|
mail@81
|
910 // for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
mail@81
|
911 // scoreChordogram[iFrame+count][bestchordL]++;
|
mail@81
|
912 // }
|
mail@81
|
913 // for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
mail@81
|
914 // scoreChordogram[iFrame+count][bestchordR]++;
|
mail@81
|
915 // }
|
mail@81
|
916 // if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
mail@81
|
917 // count++;
|
mail@81
|
918 // }
|
mail@81
|
919 // // cerr << "******* agent finished *******" << endl;
|
mail@81
|
920 // count = 0;
|
mail@81
|
921 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
mail@81
|
922 // float maxval = 0; // will be the value of the most salient chord in this frame
|
mail@81
|
923 // float maxindex = 0; //... and the index thereof
|
mail@81
|
924 // for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
mail@81
|
925 // if (scoreChordogram[count][iChord] > maxval) {
|
mail@81
|
926 // maxval = scoreChordogram[count][iChord];
|
mail@81
|
927 // maxindex = iChord;
|
mail@81
|
928 // // cerr << iChord << endl;
|
mail@81
|
929 // }
|
mail@81
|
930 // }
|
mail@81
|
931 // chordSequence.push_back(maxindex);
|
mail@81
|
932 // // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
mail@81
|
933 // count++;
|
mail@81
|
934 // }
|
mail@81
|
935 // // cerr << "******* mode filter done *******" << endl;
|
mail@81
|
936 //
|
mail@81
|
937 //
|
mail@81
|
938 // // mode filter on chordSequence
|
mail@81
|
939 // count = 0;
|
mail@81
|
940 // string oldChord = "";
|
mail@81
|
941 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
mail@81
|
942 // Feature f6 = *it;
|
mail@81
|
943 // Feature f7; // chord estimate
|
mail@81
|
944 // f7.hasTimestamp = true;
|
mail@81
|
945 // f7.timestamp = f6.timestamp;
|
mail@81
|
946 // Feature f8; // chord estimate
|
mail@81
|
947 // f8.hasTimestamp = true;
|
mail@81
|
948 // f8.timestamp = f6.timestamp;
|
mail@81
|
949 //
|
mail@81
|
950 // vector<int> chordCount = vector<int>(nChord,0);
|
mail@81
|
951 // int maxChordCount = 0;
|
mail@81
|
952 // int maxChordIndex = nChord-1;
|
mail@81
|
953 // string maxChord;
|
mail@81
|
954 // int startIndex = max(count - halfwindowlength/2,0);
|
mail@81
|
955 // int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
mail@81
|
956 // for (int i = startIndex; i < endIndex; i++) {
|
mail@81
|
957 // chordCount[chordSequence[i]]++;
|
mail@81
|
958 // if (chordCount[chordSequence[i]] > maxChordCount) {
|
mail@81
|
959 // // cerr << "start index " << startIndex << endl;
|
mail@81
|
960 // maxChordCount++;
|
mail@81
|
961 // maxChordIndex = chordSequence[i];
|
mail@81
|
962 // maxChord = m_chordnames[maxChordIndex];
|
mail@81
|
963 // }
|
mail@81
|
964 // }
|
mail@81
|
965 // // chordSequence[count] = maxChordIndex;
|
mail@81
|
966 // // cerr << maxChordIndex << endl;
|
mail@81
|
967 // f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
mail@81
|
968 // // cerr << chordchange[count] << endl;
|
mail@81
|
969 // fsOut[9].push_back(f8);
|
mail@81
|
970 // if (oldChord != maxChord) {
|
mail@81
|
971 // oldChord = maxChord;
|
mail@81
|
972 //
|
mail@81
|
973 // // char buffer1 [50];
|
mail@81
|
974 // // if (maxChordIndex < nChord - 1) {
|
mail@81
|
975 // // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
mail@81
|
976 // // } else {
|
mail@81
|
977 // // sprintf(buffer1, "N");
|
mail@81
|
978 // // }
|
mail@81
|
979 // // f7.label = buffer1;
|
mail@81
|
980 // f7.label = m_chordnames[maxChordIndex];
|
mail@81
|
981 // fsOut[7].push_back(f7);
|
mail@81
|
982 // }
|
mail@81
|
983 // count++;
|
mail@81
|
984 // }
|
mail@81
|
985 // Feature f7; // last chord estimate
|
mail@81
|
986 // f7.hasTimestamp = true;
|
mail@81
|
987 // f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
mail@81
|
988 // f7.label = "N";
|
mail@81
|
989 // fsOut[7].push_back(f7);
|
mail@81
|
990 // cerr << "done." << endl;
|
mail@81
|
991 // // // musicity
|
mail@81
|
992 // // count = 0;
|
mail@81
|
993 // // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
mail@81
|
994 // // vector<float> musicityValue;
|
mail@81
|
995 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
mail@81
|
996 // // Feature f4 = *it;
|
mail@81
|
997 // //
|
mail@81
|
998 // // int startIndex = max(count - musicitykernelwidth/2,0);
|
mail@81
|
999 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
mail@81
|
1000 // // float chromasum = 0;
|
mail@81
|
1001 // // float diffsum = 0;
|
mail@81
|
1002 // // for (int k = 0; k < 12; k++) {
|
mail@81
|
1003 // // for (int i = startIndex + 1; i < endIndex; i++) {
|
mail@81
|
1004 // // chromasum += pow(fsOut[4][i].values[k],2);
|
mail@81
|
1005 // // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
mail@81
|
1006 // // }
|
mail@81
|
1007 // // }
|
mail@81
|
1008 // // diffsum /= chromasum;
|
mail@81
|
1009 // // musicityValue.push_back(diffsum);
|
mail@81
|
1010 // // count++;
|
mail@81
|
1011 // // }
|
mail@81
|
1012 // //
|
mail@81
|
1013 // // float musicityThreshold = 0.44;
|
mail@81
|
1014 // // if (m_stepSize == 4096) {
|
mail@81
|
1015 // // musicityThreshold = 0.74;
|
mail@81
|
1016 // // }
|
mail@81
|
1017 // // if (m_stepSize == 4410) {
|
mail@81
|
1018 // // musicityThreshold = 0.77;
|
mail@81
|
1019 // // }
|
mail@81
|
1020 // //
|
mail@81
|
1021 // // count = 0;
|
mail@81
|
1022 // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
mail@81
|
1023 // // Feature f4 = *it;
|
mail@81
|
1024 // // Feature f8; // musicity
|
mail@81
|
1025 // // Feature f9; // musicity segmenter
|
mail@81
|
1026 // //
|
mail@81
|
1027 // // f8.hasTimestamp = true;
|
mail@81
|
1028 // // f8.timestamp = f4.timestamp;
|
mail@81
|
1029 // // f9.hasTimestamp = true;
|
mail@81
|
1030 // // f9.timestamp = f4.timestamp;
|
mail@81
|
1031 // //
|
mail@81
|
1032 // // int startIndex = max(count - musicitykernelwidth/2,0);
|
mail@81
|
1033 // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
mail@81
|
1034 // // int musicityCount = 0;
|
mail@81
|
1035 // // for (int i = startIndex; i <= endIndex; i++) {
|
mail@81
|
1036 // // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
mail@81
|
1037 // // }
|
mail@81
|
1038 // // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
mail@81
|
1039 // //
|
mail@81
|
1040 // // if (isSpeech) {
|
mail@81
|
1041 // // if (oldlabeltype != 2) {
|
mail@81
|
1042 // // f9.label = "Speech";
|
mail@81
|
1043 // // fsOut[9].push_back(f9);
|
mail@81
|
1044 // // oldlabeltype = 2;
|
mail@81
|
1045 // // }
|
mail@81
|
1046 // // } else {
|
mail@81
|
1047 // // if (oldlabeltype != 1) {
|
mail@81
|
1048 // // f9.label = "Music";
|
mail@81
|
1049 // // fsOut[9].push_back(f9);
|
mail@81
|
1050 // // oldlabeltype = 1;
|
mail@81
|
1051 // // }
|
mail@81
|
1052 // // }
|
mail@81
|
1053 // // f8.values.push_back(musicityValue[count]);
|
mail@81
|
1054 // // fsOut[8].push_back(f8);
|
mail@81
|
1055 // // count++;
|
mail@81
|
1056 // // }
|
Chris@23
|
1057 return fsOut;
|
matthiasm@0
|
1058
|
matthiasm@0
|
1059 }
|
matthiasm@0
|
1060
|
Chris@35
|
1061 #endif
|