Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@122
|
29 static bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@35
|
31 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
32 Plugin(inputSampleRate),
|
mail@89
|
33 m_frameCount(0),
|
Chris@35
|
34 m_logSpectrum(0),
|
Chris@23
|
35 m_blockSize(0),
|
Chris@23
|
36 m_stepSize(0),
|
Chris@23
|
37 m_lengthOfNoteIndex(0),
|
mail@80
|
38 m_meanTunings(0),
|
mail@80
|
39 m_localTunings(0),
|
mail@41
|
40 m_whitening(1.0),
|
Chris@23
|
41 m_preset(0.0),
|
matthiasm@92
|
42 m_useNNLS(1.0),
|
matthiasm@92
|
43 m_localTuning(0.0),
|
Chris@23
|
44 m_kernelValue(0),
|
Chris@23
|
45 m_kernelFftIndex(0),
|
Chris@23
|
46 m_kernelNoteIndex(0),
|
Chris@23
|
47 m_dict(0),
|
matthiasm@92
|
48 m_tuneLocal(0.0),
|
Chris@23
|
49 m_doNormalizeChroma(0),
|
matthiasm@92
|
50 m_rollon(0.0),
|
matthiasm@95
|
51 m_boostN(0.1),
|
matthiasm@42
|
52 m_s(0.7),
|
mail@115
|
53 m_harte_syntax(0),
|
mail@80
|
54 sinvalues(0),
|
mail@80
|
55 cosvalues(0)
|
matthiasm@0
|
56 {
|
Chris@35
|
57 if (debug_on) cerr << "--> NNLSBase" << endl;
|
Chris@23
|
58 // make the *note* dictionary matrix
|
Chris@23
|
59 m_dict = new float[nNote * 84];
|
matthiasm@122
|
60 for (int i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@0
|
61 }
|
matthiasm@0
|
62
|
matthiasm@0
|
63
|
Chris@35
|
64 NNLSBase::~NNLSBase()
|
matthiasm@0
|
65 {
|
Chris@35
|
66 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
67 delete [] m_dict;
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69
|
matthiasm@0
|
70 string
|
Chris@35
|
71 NNLSBase::getMaker() const
|
matthiasm@0
|
72 {
|
Chris@23
|
73 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
74 // Your name here
|
matthiasm@0
|
75 return "Matthias Mauch";
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77
|
matthiasm@0
|
78 int
|
Chris@35
|
79 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
80 {
|
Chris@23
|
81 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
82 // Increment this each time you release a version that behaves
|
matthiasm@0
|
83 // differently from the previous one
|
Chris@170
|
84 return 5;
|
matthiasm@0
|
85 }
|
matthiasm@0
|
86
|
matthiasm@0
|
87 string
|
Chris@35
|
88 NNLSBase::getCopyright() const
|
matthiasm@0
|
89 {
|
Chris@23
|
90 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
91 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
92 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
93 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
94 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
95 return "GPL";
|
matthiasm@0
|
96 }
|
matthiasm@0
|
97
|
Chris@35
|
98 NNLSBase::InputDomain
|
Chris@35
|
99 NNLSBase::getInputDomain() const
|
matthiasm@0
|
100 {
|
Chris@23
|
101 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
102 return FrequencyDomain;
|
matthiasm@0
|
103 }
|
matthiasm@0
|
104
|
matthiasm@0
|
105 size_t
|
Chris@35
|
106 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
107 {
|
Chris@23
|
108 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
109 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111
|
matthiasm@0
|
112 size_t
|
Chris@35
|
113 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
114 {
|
Chris@23
|
115 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
116 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
117 // means the same as the block size for TimeDomain
|
Chris@23
|
118 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
119 }
|
matthiasm@0
|
120
|
matthiasm@0
|
121 size_t
|
Chris@35
|
122 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
123 {
|
Chris@23
|
124 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
125 return 1;
|
matthiasm@0
|
126 }
|
matthiasm@0
|
127
|
matthiasm@0
|
128 size_t
|
Chris@35
|
129 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
130 {
|
Chris@23
|
131 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
132 return 1;
|
matthiasm@0
|
133 }
|
matthiasm@0
|
134
|
Chris@35
|
135 NNLSBase::ParameterList
|
Chris@35
|
136 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
137 {
|
Chris@23
|
138 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
139 ParameterList list;
|
matthiasm@0
|
140
|
matthiasm@42
|
141 ParameterDescriptor d;
|
matthiasm@42
|
142 d.identifier = "useNNLS";
|
matthiasm@42
|
143 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
144 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
145 d.unit = "";
|
matthiasm@42
|
146 d.minValue = 0.0;
|
matthiasm@42
|
147 d.maxValue = 1.0;
|
matthiasm@42
|
148 d.defaultValue = 1.0;
|
matthiasm@42
|
149 d.isQuantized = true;
|
matthiasm@42
|
150 d.quantizeStep = 1.0;
|
matthiasm@42
|
151 list.push_back(d);
|
matthiasm@42
|
152
|
mail@41
|
153 ParameterDescriptor d0;
|
mail@41
|
154 d0.identifier = "rollon";
|
mail@115
|
155 d0.name = "bass noise threshold";
|
mail@115
|
156 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
|
matthiasm@59
|
157 d0.unit = "%";
|
mail@41
|
158 d0.minValue = 0;
|
matthiasm@59
|
159 d0.maxValue = 5;
|
mail@41
|
160 d0.defaultValue = 0;
|
matthiasm@48
|
161 d0.isQuantized = true;
|
matthiasm@59
|
162 d0.quantizeStep = 0.5;
|
mail@41
|
163 list.push_back(d0);
|
matthiasm@4
|
164
|
matthiasm@4
|
165 ParameterDescriptor d1;
|
matthiasm@4
|
166 d1.identifier = "tuningmode";
|
matthiasm@4
|
167 d1.name = "tuning mode";
|
matthiasm@4
|
168 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
169 d1.unit = "";
|
matthiasm@4
|
170 d1.minValue = 0;
|
matthiasm@4
|
171 d1.maxValue = 1;
|
matthiasm@4
|
172 d1.defaultValue = 0;
|
matthiasm@4
|
173 d1.isQuantized = true;
|
matthiasm@4
|
174 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
175 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
176 d1.quantizeStep = 1.0;
|
matthiasm@4
|
177 list.push_back(d1);
|
matthiasm@4
|
178
|
mail@41
|
179 ParameterDescriptor d2;
|
mail@41
|
180 d2.identifier = "whitening";
|
mail@41
|
181 d2.name = "spectral whitening";
|
mail@41
|
182 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
183 d2.unit = "";
|
mail@41
|
184 d2.isQuantized = true;
|
mail@41
|
185 d2.minValue = 0.0;
|
mail@41
|
186 d2.maxValue = 1.0;
|
mail@41
|
187 d2.defaultValue = 1.0;
|
mail@41
|
188 d2.isQuantized = false;
|
mail@41
|
189 list.push_back(d2);
|
mail@41
|
190
|
mail@41
|
191 ParameterDescriptor d3;
|
mail@41
|
192 d3.identifier = "s";
|
mail@41
|
193 d3.name = "spectral shape";
|
mail@41
|
194 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
195 d3.unit = "";
|
mail@41
|
196 d3.minValue = 0.5;
|
mail@41
|
197 d3.maxValue = 0.9;
|
mail@41
|
198 d3.defaultValue = 0.7;
|
mail@41
|
199 d3.isQuantized = false;
|
mail@41
|
200 list.push_back(d3);
|
mail@41
|
201
|
Chris@23
|
202 ParameterDescriptor d4;
|
matthiasm@12
|
203 d4.identifier = "chromanormalize";
|
matthiasm@12
|
204 d4.name = "chroma normalization";
|
matthiasm@12
|
205 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
206 d4.unit = "";
|
matthiasm@12
|
207 d4.minValue = 0;
|
matthiasm@13
|
208 d4.maxValue = 3;
|
matthiasm@12
|
209 d4.defaultValue = 0;
|
matthiasm@12
|
210 d4.isQuantized = true;
|
matthiasm@13
|
211 d4.valueNames.push_back("none");
|
matthiasm@13
|
212 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
213 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
214 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
215 d4.quantizeStep = 1.0;
|
matthiasm@12
|
216 list.push_back(d4);
|
matthiasm@4
|
217
|
matthiasm@0
|
218 return list;
|
matthiasm@0
|
219 }
|
matthiasm@0
|
220
|
matthiasm@0
|
221 float
|
Chris@35
|
222 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
223 {
|
Chris@23
|
224 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
225 if (identifier == "useNNLS") {
|
matthiasm@42
|
226 return m_useNNLS;
|
matthiasm@0
|
227 }
|
matthiasm@0
|
228
|
mail@41
|
229 if (identifier == "whitening") {
|
mail@41
|
230 return m_whitening;
|
mail@41
|
231 }
|
mail@41
|
232
|
mail@41
|
233 if (identifier == "s") {
|
mail@41
|
234 return m_s;
|
matthiasm@0
|
235 }
|
matthiasm@17
|
236
|
Chris@23
|
237 if (identifier == "rollon") {
|
matthiasm@17
|
238 return m_rollon;
|
matthiasm@17
|
239 }
|
matthiasm@0
|
240
|
mail@89
|
241 if (identifier == "boostn") {
|
mail@89
|
242 return m_boostN;
|
mail@89
|
243 }
|
mail@89
|
244
|
matthiasm@0
|
245 if (identifier == "tuningmode") {
|
matthiasm@0
|
246 if (m_tuneLocal) {
|
matthiasm@0
|
247 return 1.0;
|
matthiasm@0
|
248 } else {
|
matthiasm@0
|
249 return 0.0;
|
matthiasm@0
|
250 }
|
matthiasm@0
|
251 }
|
Chris@23
|
252 if (identifier == "preset") {
|
Chris@23
|
253 return m_preset;
|
matthiasm@3
|
254 }
|
Chris@23
|
255 if (identifier == "chromanormalize") {
|
Chris@23
|
256 return m_doNormalizeChroma;
|
matthiasm@12
|
257 }
|
matthiasm@50
|
258
|
mail@112
|
259 if (identifier == "usehartesyntax") {
|
mail@115
|
260 return m_harte_syntax;
|
mail@112
|
261 }
|
mail@112
|
262
|
matthiasm@0
|
263 return 0;
|
matthiasm@0
|
264
|
matthiasm@0
|
265 }
|
matthiasm@0
|
266
|
matthiasm@0
|
267 void
|
Chris@35
|
268 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
269 {
|
Chris@164
|
270 // cerr << "setParameter (" << identifier << ") -> " << value << endl;
|
Chris@164
|
271
|
Chris@23
|
272 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
273 if (identifier == "useNNLS") {
|
matthiasm@42
|
274 m_useNNLS = (int) value;
|
matthiasm@0
|
275 }
|
matthiasm@0
|
276
|
mail@41
|
277 if (identifier == "whitening") {
|
mail@41
|
278 m_whitening = value;
|
matthiasm@0
|
279 }
|
matthiasm@0
|
280
|
mail@41
|
281 if (identifier == "s") {
|
mail@41
|
282 m_s = value;
|
mail@41
|
283 }
|
mail@41
|
284
|
mail@89
|
285 if (identifier == "boostn") {
|
mail@89
|
286 m_boostN = value;
|
mail@89
|
287 }
|
mail@89
|
288
|
matthiasm@0
|
289 if (identifier == "tuningmode") {
|
mail@60
|
290 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
291 m_tuneLocal = value;
|
matthiasm@0
|
292 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
293 }
|
matthiasm@42
|
294 // if (identifier == "preset") {
|
matthiasm@42
|
295 // m_preset = value;
|
matthiasm@42
|
296 // if (m_preset == 0.0) {
|
matthiasm@42
|
297 // m_tuneLocal = false;
|
matthiasm@42
|
298 // m_whitening = 1.0;
|
matthiasm@42
|
299 // m_dictID = 0.0;
|
matthiasm@42
|
300 // }
|
matthiasm@42
|
301 // if (m_preset == 1.0) {
|
matthiasm@42
|
302 // m_tuneLocal = false;
|
matthiasm@42
|
303 // m_whitening = 1.0;
|
matthiasm@42
|
304 // m_dictID = 1.0;
|
matthiasm@42
|
305 // }
|
matthiasm@42
|
306 // if (m_preset == 2.0) {
|
matthiasm@42
|
307 // m_tuneLocal = false;
|
matthiasm@42
|
308 // m_whitening = 0.7;
|
matthiasm@42
|
309 // m_dictID = 0.0;
|
matthiasm@42
|
310 // }
|
matthiasm@42
|
311 // }
|
Chris@23
|
312 if (identifier == "chromanormalize") {
|
Chris@23
|
313 m_doNormalizeChroma = value;
|
Chris@23
|
314 }
|
matthiasm@17
|
315
|
Chris@23
|
316 if (identifier == "rollon") {
|
Chris@23
|
317 m_rollon = value;
|
Chris@23
|
318 }
|
mail@112
|
319
|
mail@112
|
320 if (identifier == "usehartesyntax") {
|
mail@115
|
321 m_harte_syntax = value;
|
mail@112
|
322 }
|
matthiasm@0
|
323 }
|
matthiasm@0
|
324
|
Chris@35
|
325 NNLSBase::ProgramList
|
Chris@35
|
326 NNLSBase::getPrograms() const
|
matthiasm@0
|
327 {
|
Chris@23
|
328 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
329 ProgramList list;
|
matthiasm@0
|
330
|
matthiasm@0
|
331 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
332 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
333
|
matthiasm@0
|
334 return list;
|
matthiasm@0
|
335 }
|
matthiasm@0
|
336
|
matthiasm@0
|
337 string
|
Chris@35
|
338 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
339 {
|
Chris@23
|
340 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
341 return ""; // no programs
|
matthiasm@0
|
342 }
|
matthiasm@0
|
343
|
matthiasm@0
|
344 void
|
Chris@35
|
345 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
346 {
|
Chris@23
|
347 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
348 }
|
matthiasm@0
|
349
|
matthiasm@0
|
350
|
matthiasm@0
|
351 bool
|
Chris@35
|
352 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
353 {
|
Chris@23
|
354 if (debug_on) {
|
Chris@23
|
355 cerr << "--> initialise";
|
Chris@23
|
356 }
|
matthiasm@1
|
357
|
mail@100
|
358 dictionaryMatrix(m_dict, m_s);
|
mail@100
|
359
|
mail@80
|
360 // make things for tuning estimation
|
mail@80
|
361 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
362 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
363 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
364 }
|
mail@80
|
365
|
mail@80
|
366
|
mail@80
|
367 // make hamming window of length 1/2 octave
|
mail@76
|
368 int hamwinlength = nBPS * 6 + 1;
|
mail@76
|
369 float hamwinsum = 0;
|
mail@76
|
370 for (int i = 0; i < hamwinlength; ++i) {
|
mail@76
|
371 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
mail@76
|
372 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
mail@76
|
373 }
|
mail@77
|
374 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
|
mail@80
|
375
|
mail@80
|
376
|
mail@80
|
377 // initialise the tuning
|
mail@80
|
378 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
379 m_meanTunings.push_back(0);
|
mail@80
|
380 m_localTunings.push_back(0);
|
mail@80
|
381 }
|
mail@76
|
382
|
matthiasm@0
|
383 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
384 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
385 m_blockSize = blockSize;
|
matthiasm@0
|
386 m_stepSize = stepSize;
|
Chris@35
|
387 m_frameCount = 0;
|
mail@77
|
388 int tempn = nNote * m_blockSize/2;
|
Chris@23
|
389 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
390 float *tempkernel;
|
matthiasm@1
|
391
|
Chris@23
|
392 tempkernel = new float[tempn];
|
matthiasm@1
|
393
|
Chris@23
|
394 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
395 m_kernelValue.clear();
|
Chris@23
|
396 m_kernelFftIndex.clear();
|
Chris@23
|
397 m_kernelNoteIndex.clear();
|
Chris@23
|
398 int countNonzero = 0;
|
Chris@91
|
399 for (int iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@122
|
400 for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) {
|
Chris@23
|
401 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
402 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
403 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
404 countNonzero++;
|
Chris@23
|
405 }
|
Chris@23
|
406 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
407 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
408 }
|
Chris@23
|
409 }
|
Chris@23
|
410 }
|
Chris@23
|
411 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
412 delete [] tempkernel;
|
Chris@35
|
413 /*
|
Chris@23
|
414 ofstream myfile;
|
Chris@23
|
415 myfile.open ("matrix.txt");
|
matthiasm@3
|
416 // myfile << "Writing this to a file.\n";
|
Chris@23
|
417 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
418 myfile << m_dict[i] << endl;
|
Chris@23
|
419 }
|
matthiasm@3
|
420 myfile.close();
|
Chris@35
|
421 */
|
matthiasm@0
|
422 return true;
|
matthiasm@0
|
423 }
|
matthiasm@0
|
424
|
matthiasm@0
|
425 void
|
Chris@35
|
426 NNLSBase::reset()
|
matthiasm@0
|
427 {
|
Chris@23
|
428 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
429
|
matthiasm@0
|
430 // Clear buffers, reset stored values, etc
|
Chris@35
|
431 m_frameCount = 0;
|
matthiasm@42
|
432 // m_dictID = 0;
|
Chris@35
|
433 m_logSpectrum.clear();
|
mail@80
|
434 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
435 m_meanTunings[iBPS] = 0;
|
mail@80
|
436 m_localTunings[iBPS] = 0;
|
mail@80
|
437 }
|
Chris@23
|
438 m_localTuning.clear();
|
matthiasm@0
|
439 }
|
matthiasm@0
|
440
|
Chris@35
|
441 void
|
Chris@35
|
442 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
443 {
|
Chris@35
|
444 m_frameCount++;
|
Chris@23
|
445 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
446
|
Chris@23
|
447 const float *fbuf = inputBuffers[0];
|
Chris@23
|
448 float energysum = 0;
|
Chris@23
|
449 // make magnitude
|
Chris@23
|
450 float maxmag = -10000;
|
matthiasm@122
|
451 for (int iBin = 0; iBin < static_cast<int>(m_blockSize/2); iBin++) {
|
Chris@23
|
452 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@93
|
453 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@95
|
454 if (magnitude[iBin]>m_blockSize*1.0) magnitude[iBin] = m_blockSize; // a valid audio signal (between -1 and 1) should not be limited here.
|
Chris@23
|
455 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
456 if (m_rollon > 0) {
|
Chris@23
|
457 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
458 }
|
Chris@23
|
459 }
|
matthiasm@14
|
460
|
Chris@23
|
461 float cumenergy = 0;
|
Chris@23
|
462 if (m_rollon > 0) {
|
matthiasm@122
|
463 for (int iBin = 2; iBin < static_cast<int>(m_blockSize/2); iBin++) {
|
Chris@23
|
464 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
465 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
466 else break;
|
Chris@23
|
467 }
|
Chris@23
|
468 }
|
matthiasm@17
|
469
|
matthiasm@147
|
470 if (maxmag < m_blockSize * 2.0 / 16384.0) { // this is not quite right, I think
|
Chris@23
|
471 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
matthiasm@122
|
472 for (int iBin = 0; iBin < static_cast<int>(m_blockSize/2); iBin++) {
|
Chris@23
|
473 magnitude[iBin] = 0;
|
Chris@23
|
474 }
|
Chris@23
|
475 }
|
matthiasm@4
|
476
|
Chris@23
|
477 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
478 float *nm = new float[nNote]; // note magnitude
|
Chris@91
|
479 for (int iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
480 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
481 }
|
Chris@23
|
482 int binCount = 0;
|
Chris@23
|
483 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
484 // cerr << ".";
|
Chris@23
|
485 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
486 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
487 binCount++;
|
Chris@23
|
488 }
|
Chris@23
|
489 // cerr << nm[20];
|
Chris@23
|
490 // cerr << endl;
|
matthiasm@0
|
491
|
matthiasm@0
|
492
|
Chris@35
|
493 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
494 // update means of complex tuning variables
|
mail@80
|
495 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
|
mail@80
|
496
|
mail@80
|
497 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
|
mail@80
|
498 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
|
Chris@23
|
499 float ratioOld = 0.997;
|
mail@80
|
500 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
501 m_localTunings[iBPS] *= ratioOld;
|
mail@80
|
502 m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
|
mail@80
|
503 }
|
matthiasm@0
|
504 }
|
matthiasm@0
|
505 // if (m_tuneLocal) {
|
Chris@23
|
506 // local tuning
|
mail@80
|
507 // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
|
mail@80
|
508 // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
|
mail@80
|
509
|
mail@80
|
510 float localTuningImag = 0;
|
mail@80
|
511 float localTuningReal = 0;
|
mail@80
|
512 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
513 localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
514 localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
515 }
|
mail@80
|
516
|
Chris@23
|
517 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
518 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
519
|
Chris@23
|
520 Feature f1; // logfreqspec
|
Chris@23
|
521 f1.hasTimestamp = true;
|
matthiasm@0
|
522 f1.timestamp = timestamp;
|
Chris@91
|
523 for (int iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
524 f1.values.push_back(nm[iNote]);
|
Chris@23
|
525 }
|
matthiasm@0
|
526
|
matthiasm@0
|
527 // deletes
|
matthiasm@0
|
528 delete[] magnitude;
|
matthiasm@0
|
529 delete[] nm;
|
matthiasm@0
|
530
|
Chris@35
|
531 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
532 }
|
matthiasm@0
|
533
|