Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "NNLSBase.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
Chris@35
|
31 NNLSBase::NNLSBase(float inputSampleRate) :
|
Chris@23
|
32 Plugin(inputSampleRate),
|
mail@89
|
33 m_frameCount(0),
|
Chris@35
|
34 m_logSpectrum(0),
|
Chris@23
|
35 m_blockSize(0),
|
Chris@23
|
36 m_stepSize(0),
|
Chris@23
|
37 m_lengthOfNoteIndex(0),
|
mail@80
|
38 m_meanTunings(0),
|
mail@80
|
39 m_localTunings(0),
|
mail@41
|
40 m_whitening(1.0),
|
Chris@23
|
41 m_preset(0.0),
|
matthiasm@92
|
42 m_useNNLS(1.0),
|
matthiasm@92
|
43 m_useHMM(1.0),
|
matthiasm@92
|
44 m_localTuning(0.0),
|
Chris@23
|
45 m_kernelValue(0),
|
Chris@23
|
46 m_kernelFftIndex(0),
|
Chris@23
|
47 m_kernelNoteIndex(0),
|
Chris@23
|
48 m_dict(0),
|
matthiasm@92
|
49 m_tuneLocal(0.0),
|
Chris@23
|
50 m_doNormalizeChroma(0),
|
matthiasm@92
|
51 m_rollon(0.0),
|
mail@89
|
52 m_boostN(1.1),
|
matthiasm@42
|
53 m_s(0.7),
|
mail@80
|
54 sinvalues(0),
|
mail@80
|
55 cosvalues(0)
|
matthiasm@0
|
56 {
|
Chris@35
|
57 if (debug_on) cerr << "--> NNLSBase" << endl;
|
Chris@23
|
58 // make the *note* dictionary matrix
|
Chris@23
|
59 m_dict = new float[nNote * 84];
|
Chris@23
|
60 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
mail@41
|
61 dictionaryMatrix(m_dict, 0.7);
|
matthiasm@0
|
62 }
|
matthiasm@0
|
63
|
matthiasm@0
|
64
|
Chris@35
|
65 NNLSBase::~NNLSBase()
|
matthiasm@0
|
66 {
|
Chris@35
|
67 if (debug_on) cerr << "--> ~NNLSBase" << endl;
|
Chris@23
|
68 delete [] m_dict;
|
matthiasm@0
|
69 }
|
matthiasm@0
|
70
|
matthiasm@0
|
71 string
|
Chris@35
|
72 NNLSBase::getMaker() const
|
matthiasm@0
|
73 {
|
Chris@23
|
74 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
75 // Your name here
|
matthiasm@0
|
76 return "Matthias Mauch";
|
matthiasm@0
|
77 }
|
matthiasm@0
|
78
|
matthiasm@0
|
79 int
|
Chris@35
|
80 NNLSBase::getPluginVersion() const
|
matthiasm@0
|
81 {
|
Chris@23
|
82 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
83 // Increment this each time you release a version that behaves
|
matthiasm@0
|
84 // differently from the previous one
|
matthiasm@0
|
85 return 1;
|
matthiasm@0
|
86 }
|
matthiasm@0
|
87
|
matthiasm@0
|
88 string
|
Chris@35
|
89 NNLSBase::getCopyright() const
|
matthiasm@0
|
90 {
|
Chris@23
|
91 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
92 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
93 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
94 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
95 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@35
|
96 return "GPL";
|
matthiasm@0
|
97 }
|
matthiasm@0
|
98
|
Chris@35
|
99 NNLSBase::InputDomain
|
Chris@35
|
100 NNLSBase::getInputDomain() const
|
matthiasm@0
|
101 {
|
Chris@23
|
102 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
103 return FrequencyDomain;
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
matthiasm@0
|
106 size_t
|
Chris@35
|
107 NNLSBase::getPreferredBlockSize() const
|
matthiasm@0
|
108 {
|
Chris@23
|
109 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
110 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
111 }
|
matthiasm@0
|
112
|
matthiasm@0
|
113 size_t
|
Chris@35
|
114 NNLSBase::getPreferredStepSize() const
|
matthiasm@0
|
115 {
|
Chris@23
|
116 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
117 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
118 // means the same as the block size for TimeDomain
|
Chris@23
|
119 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
120 }
|
matthiasm@0
|
121
|
matthiasm@0
|
122 size_t
|
Chris@35
|
123 NNLSBase::getMinChannelCount() const
|
matthiasm@0
|
124 {
|
Chris@23
|
125 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
126 return 1;
|
matthiasm@0
|
127 }
|
matthiasm@0
|
128
|
matthiasm@0
|
129 size_t
|
Chris@35
|
130 NNLSBase::getMaxChannelCount() const
|
matthiasm@0
|
131 {
|
Chris@23
|
132 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
133 return 1;
|
matthiasm@0
|
134 }
|
matthiasm@0
|
135
|
Chris@35
|
136 NNLSBase::ParameterList
|
Chris@35
|
137 NNLSBase::getParameterDescriptors() const
|
matthiasm@0
|
138 {
|
Chris@23
|
139 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
140 ParameterList list;
|
matthiasm@0
|
141
|
matthiasm@42
|
142 ParameterDescriptor d;
|
matthiasm@42
|
143 d.identifier = "useNNLS";
|
matthiasm@42
|
144 d.name = "use approximate transcription (NNLS)";
|
matthiasm@42
|
145 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@42
|
146 d.unit = "";
|
matthiasm@42
|
147 d.minValue = 0.0;
|
matthiasm@42
|
148 d.maxValue = 1.0;
|
matthiasm@42
|
149 d.defaultValue = 1.0;
|
matthiasm@42
|
150 d.isQuantized = true;
|
matthiasm@42
|
151 d.quantizeStep = 1.0;
|
matthiasm@42
|
152 list.push_back(d);
|
matthiasm@42
|
153
|
mail@41
|
154 ParameterDescriptor d0;
|
mail@41
|
155 d0.identifier = "rollon";
|
mail@41
|
156 d0.name = "spectral roll-on";
|
matthiasm@58
|
157 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
158 d0.unit = "%";
|
mail@41
|
159 d0.minValue = 0;
|
matthiasm@59
|
160 d0.maxValue = 5;
|
mail@41
|
161 d0.defaultValue = 0;
|
matthiasm@48
|
162 d0.isQuantized = true;
|
matthiasm@59
|
163 d0.quantizeStep = 0.5;
|
mail@41
|
164 list.push_back(d0);
|
matthiasm@4
|
165
|
matthiasm@4
|
166 ParameterDescriptor d1;
|
matthiasm@4
|
167 d1.identifier = "tuningmode";
|
matthiasm@4
|
168 d1.name = "tuning mode";
|
matthiasm@4
|
169 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
170 d1.unit = "";
|
matthiasm@4
|
171 d1.minValue = 0;
|
matthiasm@4
|
172 d1.maxValue = 1;
|
matthiasm@4
|
173 d1.defaultValue = 0;
|
matthiasm@4
|
174 d1.isQuantized = true;
|
matthiasm@4
|
175 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
176 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
177 d1.quantizeStep = 1.0;
|
matthiasm@4
|
178 list.push_back(d1);
|
matthiasm@4
|
179
|
mail@41
|
180 ParameterDescriptor d2;
|
mail@41
|
181 d2.identifier = "whitening";
|
mail@41
|
182 d2.name = "spectral whitening";
|
mail@41
|
183 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
mail@41
|
184 d2.unit = "";
|
mail@41
|
185 d2.isQuantized = true;
|
mail@41
|
186 d2.minValue = 0.0;
|
mail@41
|
187 d2.maxValue = 1.0;
|
mail@41
|
188 d2.defaultValue = 1.0;
|
mail@41
|
189 d2.isQuantized = false;
|
mail@41
|
190 list.push_back(d2);
|
mail@41
|
191
|
mail@41
|
192 ParameterDescriptor d3;
|
mail@41
|
193 d3.identifier = "s";
|
mail@41
|
194 d3.name = "spectral shape";
|
mail@41
|
195 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
mail@41
|
196 d3.unit = "";
|
mail@41
|
197 d3.minValue = 0.5;
|
mail@41
|
198 d3.maxValue = 0.9;
|
mail@41
|
199 d3.defaultValue = 0.7;
|
mail@41
|
200 d3.isQuantized = false;
|
mail@41
|
201 list.push_back(d3);
|
mail@41
|
202
|
Chris@23
|
203 ParameterDescriptor d4;
|
matthiasm@12
|
204 d4.identifier = "chromanormalize";
|
matthiasm@12
|
205 d4.name = "chroma normalization";
|
matthiasm@12
|
206 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
207 d4.unit = "";
|
matthiasm@12
|
208 d4.minValue = 0;
|
matthiasm@13
|
209 d4.maxValue = 3;
|
matthiasm@12
|
210 d4.defaultValue = 0;
|
matthiasm@12
|
211 d4.isQuantized = true;
|
matthiasm@13
|
212 d4.valueNames.push_back("none");
|
matthiasm@13
|
213 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
214 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
215 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
216 d4.quantizeStep = 1.0;
|
matthiasm@12
|
217 list.push_back(d4);
|
matthiasm@4
|
218
|
matthiasm@0
|
219 return list;
|
matthiasm@0
|
220 }
|
matthiasm@0
|
221
|
matthiasm@0
|
222 float
|
Chris@35
|
223 NNLSBase::getParameter(string identifier) const
|
matthiasm@0
|
224 {
|
Chris@23
|
225 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@42
|
226 if (identifier == "useNNLS") {
|
matthiasm@42
|
227 return m_useNNLS;
|
matthiasm@0
|
228 }
|
matthiasm@0
|
229
|
mail@41
|
230 if (identifier == "whitening") {
|
mail@41
|
231 return m_whitening;
|
mail@41
|
232 }
|
mail@41
|
233
|
mail@41
|
234 if (identifier == "s") {
|
mail@41
|
235 return m_s;
|
matthiasm@0
|
236 }
|
matthiasm@17
|
237
|
Chris@23
|
238 if (identifier == "rollon") {
|
matthiasm@17
|
239 return m_rollon;
|
matthiasm@17
|
240 }
|
matthiasm@0
|
241
|
mail@89
|
242 if (identifier == "boostn") {
|
mail@89
|
243 return m_boostN;
|
mail@89
|
244 }
|
mail@89
|
245
|
matthiasm@0
|
246 if (identifier == "tuningmode") {
|
matthiasm@0
|
247 if (m_tuneLocal) {
|
matthiasm@0
|
248 return 1.0;
|
matthiasm@0
|
249 } else {
|
matthiasm@0
|
250 return 0.0;
|
matthiasm@0
|
251 }
|
matthiasm@0
|
252 }
|
Chris@23
|
253 if (identifier == "preset") {
|
Chris@23
|
254 return m_preset;
|
matthiasm@3
|
255 }
|
Chris@23
|
256 if (identifier == "chromanormalize") {
|
Chris@23
|
257 return m_doNormalizeChroma;
|
matthiasm@12
|
258 }
|
matthiasm@50
|
259
|
matthiasm@50
|
260 if (identifier == "useHMM") {
|
matthiasm@50
|
261 return m_useHMM;
|
matthiasm@50
|
262 }
|
matthiasm@50
|
263
|
matthiasm@0
|
264 return 0;
|
matthiasm@0
|
265
|
matthiasm@0
|
266 }
|
matthiasm@0
|
267
|
matthiasm@0
|
268 void
|
Chris@35
|
269 NNLSBase::setParameter(string identifier, float value)
|
matthiasm@0
|
270 {
|
Chris@23
|
271 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@42
|
272 if (identifier == "useNNLS") {
|
matthiasm@42
|
273 m_useNNLS = (int) value;
|
matthiasm@0
|
274 }
|
matthiasm@0
|
275
|
mail@41
|
276 if (identifier == "whitening") {
|
mail@41
|
277 m_whitening = value;
|
matthiasm@0
|
278 }
|
matthiasm@0
|
279
|
mail@41
|
280 if (identifier == "s") {
|
mail@41
|
281 m_s = value;
|
mail@41
|
282 }
|
mail@41
|
283
|
matthiasm@50
|
284 if (identifier == "useHMM") {
|
matthiasm@50
|
285 m_useHMM = value;
|
matthiasm@50
|
286 }
|
matthiasm@50
|
287
|
mail@89
|
288 if (identifier == "boostn") {
|
mail@89
|
289 m_boostN = value;
|
mail@89
|
290 }
|
mail@89
|
291
|
matthiasm@0
|
292 if (identifier == "tuningmode") {
|
mail@60
|
293 // m_tuneLocal = (value > 0) ? true : false;
|
mail@60
|
294 m_tuneLocal = value;
|
matthiasm@0
|
295 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
296 }
|
matthiasm@42
|
297 // if (identifier == "preset") {
|
matthiasm@42
|
298 // m_preset = value;
|
matthiasm@42
|
299 // if (m_preset == 0.0) {
|
matthiasm@42
|
300 // m_tuneLocal = false;
|
matthiasm@42
|
301 // m_whitening = 1.0;
|
matthiasm@42
|
302 // m_dictID = 0.0;
|
matthiasm@42
|
303 // }
|
matthiasm@42
|
304 // if (m_preset == 1.0) {
|
matthiasm@42
|
305 // m_tuneLocal = false;
|
matthiasm@42
|
306 // m_whitening = 1.0;
|
matthiasm@42
|
307 // m_dictID = 1.0;
|
matthiasm@42
|
308 // }
|
matthiasm@42
|
309 // if (m_preset == 2.0) {
|
matthiasm@42
|
310 // m_tuneLocal = false;
|
matthiasm@42
|
311 // m_whitening = 0.7;
|
matthiasm@42
|
312 // m_dictID = 0.0;
|
matthiasm@42
|
313 // }
|
matthiasm@42
|
314 // }
|
Chris@23
|
315 if (identifier == "chromanormalize") {
|
Chris@23
|
316 m_doNormalizeChroma = value;
|
Chris@23
|
317 }
|
matthiasm@17
|
318
|
Chris@23
|
319 if (identifier == "rollon") {
|
Chris@23
|
320 m_rollon = value;
|
Chris@23
|
321 }
|
matthiasm@0
|
322 }
|
matthiasm@0
|
323
|
Chris@35
|
324 NNLSBase::ProgramList
|
Chris@35
|
325 NNLSBase::getPrograms() const
|
matthiasm@0
|
326 {
|
Chris@23
|
327 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
328 ProgramList list;
|
matthiasm@0
|
329
|
matthiasm@0
|
330 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
331 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
332
|
matthiasm@0
|
333 return list;
|
matthiasm@0
|
334 }
|
matthiasm@0
|
335
|
matthiasm@0
|
336 string
|
Chris@35
|
337 NNLSBase::getCurrentProgram() const
|
matthiasm@0
|
338 {
|
Chris@23
|
339 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
340 return ""; // no programs
|
matthiasm@0
|
341 }
|
matthiasm@0
|
342
|
matthiasm@0
|
343 void
|
Chris@35
|
344 NNLSBase::selectProgram(string name)
|
matthiasm@0
|
345 {
|
Chris@23
|
346 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
347 }
|
matthiasm@0
|
348
|
matthiasm@0
|
349
|
matthiasm@0
|
350 bool
|
Chris@35
|
351 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
352 {
|
Chris@23
|
353 if (debug_on) {
|
Chris@23
|
354 cerr << "--> initialise";
|
Chris@23
|
355 }
|
matthiasm@1
|
356
|
mail@80
|
357 // make things for tuning estimation
|
mail@80
|
358 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
359 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
360 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
|
mail@80
|
361 }
|
mail@80
|
362
|
mail@80
|
363
|
mail@80
|
364 // make hamming window of length 1/2 octave
|
mail@76
|
365 int hamwinlength = nBPS * 6 + 1;
|
mail@76
|
366 float hamwinsum = 0;
|
mail@76
|
367 for (int i = 0; i < hamwinlength; ++i) {
|
mail@76
|
368 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
|
mail@76
|
369 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
|
mail@76
|
370 }
|
mail@77
|
371 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
|
mail@80
|
372
|
mail@80
|
373
|
mail@80
|
374 // initialise the tuning
|
mail@80
|
375 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
376 m_meanTunings.push_back(0);
|
mail@80
|
377 m_localTunings.push_back(0);
|
mail@80
|
378 }
|
mail@76
|
379
|
matthiasm@0
|
380 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
381 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
382 m_blockSize = blockSize;
|
matthiasm@0
|
383 m_stepSize = stepSize;
|
Chris@35
|
384 m_frameCount = 0;
|
mail@77
|
385 int tempn = nNote * m_blockSize/2;
|
Chris@23
|
386 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
387 float *tempkernel;
|
matthiasm@1
|
388
|
Chris@23
|
389 tempkernel = new float[tempn];
|
matthiasm@1
|
390
|
Chris@23
|
391 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
392 m_kernelValue.clear();
|
Chris@23
|
393 m_kernelFftIndex.clear();
|
Chris@23
|
394 m_kernelNoteIndex.clear();
|
Chris@23
|
395 int countNonzero = 0;
|
Chris@91
|
396 for (int iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@91
|
397 for (int iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
398 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
399 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
400 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
401 countNonzero++;
|
Chris@23
|
402 }
|
Chris@23
|
403 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
404 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
405 }
|
Chris@23
|
406 }
|
Chris@23
|
407 }
|
Chris@23
|
408 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
409 delete [] tempkernel;
|
Chris@35
|
410 /*
|
Chris@23
|
411 ofstream myfile;
|
Chris@23
|
412 myfile.open ("matrix.txt");
|
matthiasm@3
|
413 // myfile << "Writing this to a file.\n";
|
Chris@23
|
414 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
415 myfile << m_dict[i] << endl;
|
Chris@23
|
416 }
|
matthiasm@3
|
417 myfile.close();
|
Chris@35
|
418 */
|
matthiasm@0
|
419 return true;
|
matthiasm@0
|
420 }
|
matthiasm@0
|
421
|
matthiasm@0
|
422 void
|
Chris@35
|
423 NNLSBase::reset()
|
matthiasm@0
|
424 {
|
Chris@23
|
425 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
426
|
matthiasm@0
|
427 // Clear buffers, reset stored values, etc
|
Chris@35
|
428 m_frameCount = 0;
|
matthiasm@42
|
429 // m_dictID = 0;
|
Chris@35
|
430 m_logSpectrum.clear();
|
mail@80
|
431 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
432 m_meanTunings[iBPS] = 0;
|
mail@80
|
433 m_localTunings[iBPS] = 0;
|
mail@80
|
434 }
|
Chris@23
|
435 m_localTuning.clear();
|
matthiasm@0
|
436 }
|
matthiasm@0
|
437
|
Chris@35
|
438 void
|
Chris@35
|
439 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
440 {
|
Chris@35
|
441 m_frameCount++;
|
Chris@23
|
442 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
443
|
Chris@23
|
444 const float *fbuf = inputBuffers[0];
|
Chris@23
|
445 float energysum = 0;
|
Chris@23
|
446 // make magnitude
|
Chris@23
|
447 float maxmag = -10000;
|
Chris@23
|
448 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
449 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
450 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
451 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
452 if (m_rollon > 0) {
|
Chris@23
|
453 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
454 }
|
Chris@23
|
455 }
|
matthiasm@14
|
456
|
Chris@23
|
457 float cumenergy = 0;
|
Chris@23
|
458 if (m_rollon > 0) {
|
Chris@23
|
459 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
460 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@59
|
461 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
|
Chris@23
|
462 else break;
|
Chris@23
|
463 }
|
Chris@23
|
464 }
|
matthiasm@17
|
465
|
Chris@23
|
466 if (maxmag < 2) {
|
Chris@23
|
467 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
468 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
469 magnitude[iBin] = 0;
|
Chris@23
|
470 }
|
Chris@23
|
471 }
|
matthiasm@4
|
472
|
Chris@23
|
473 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
474 float *nm = new float[nNote]; // note magnitude
|
Chris@91
|
475 for (int iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
476 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
477 }
|
Chris@23
|
478 int binCount = 0;
|
Chris@23
|
479 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
480 // cerr << ".";
|
Chris@23
|
481 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
482 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
483 binCount++;
|
Chris@23
|
484 }
|
Chris@23
|
485 // cerr << nm[20];
|
Chris@23
|
486 // cerr << endl;
|
matthiasm@0
|
487
|
matthiasm@0
|
488
|
Chris@35
|
489 float one_over_N = 1.0/m_frameCount;
|
matthiasm@0
|
490 // update means of complex tuning variables
|
mail@80
|
491 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
|
mail@80
|
492
|
mail@80
|
493 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
|
mail@80
|
494 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
|
Chris@23
|
495 float ratioOld = 0.997;
|
mail@80
|
496 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
497 m_localTunings[iBPS] *= ratioOld;
|
mail@80
|
498 m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
|
mail@80
|
499 }
|
matthiasm@0
|
500 }
|
matthiasm@0
|
501 // if (m_tuneLocal) {
|
Chris@23
|
502 // local tuning
|
mail@80
|
503 // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
|
mail@80
|
504 // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
|
mail@80
|
505
|
mail@80
|
506 float localTuningImag = 0;
|
mail@80
|
507 float localTuningReal = 0;
|
mail@80
|
508 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
509 localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
510 localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
511 }
|
mail@80
|
512
|
Chris@23
|
513 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
514 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
515
|
Chris@23
|
516 Feature f1; // logfreqspec
|
Chris@23
|
517 f1.hasTimestamp = true;
|
matthiasm@0
|
518 f1.timestamp = timestamp;
|
Chris@91
|
519 for (int iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
520 f1.values.push_back(nm[iNote]);
|
Chris@23
|
521 }
|
matthiasm@0
|
522
|
matthiasm@0
|
523 // deletes
|
matthiasm@0
|
524 delete[] magnitude;
|
matthiasm@0
|
525 delete[] nm;
|
matthiasm@0
|
526
|
Chris@35
|
527 m_logSpectrum.push_back(f1); // remember note magnitude
|
matthiasm@0
|
528 }
|
matthiasm@0
|
529
|