Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
matthiasm@0
|
3 #include "NNLSChroma.h"
|
Chris@27
|
4
|
Chris@27
|
5 #include "chromamethods.h"
|
Chris@27
|
6
|
Chris@27
|
7 #include <cstdlib>
|
Chris@27
|
8 #include <fstream>
|
matthiasm@0
|
9 #include <cmath>
|
matthiasm@9
|
10
|
Chris@27
|
11 #include <algorithm>
|
matthiasm@0
|
12
|
matthiasm@0
|
13 const bool debug_on = false;
|
matthiasm@0
|
14
|
Chris@27
|
15 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
16
|
matthiasm@0
|
17 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
Chris@23
|
18 Plugin(inputSampleRate),
|
Chris@23
|
19 m_fl(0),
|
Chris@23
|
20 m_blockSize(0),
|
Chris@23
|
21 m_stepSize(0),
|
Chris@23
|
22 m_lengthOfNoteIndex(0),
|
Chris@23
|
23 m_meanTuning0(0),
|
Chris@23
|
24 m_meanTuning1(0),
|
Chris@23
|
25 m_meanTuning2(0),
|
Chris@23
|
26 m_localTuning0(0),
|
Chris@23
|
27 m_localTuning1(0),
|
Chris@23
|
28 m_localTuning2(0),
|
Chris@23
|
29 m_paling(1.0),
|
Chris@23
|
30 m_preset(0.0),
|
Chris@23
|
31 m_localTuning(0),
|
Chris@23
|
32 m_kernelValue(0),
|
Chris@23
|
33 m_kernelFftIndex(0),
|
Chris@23
|
34 m_kernelNoteIndex(0),
|
Chris@23
|
35 m_dict(0),
|
Chris@23
|
36 m_tuneLocal(false),
|
Chris@23
|
37 m_dictID(0),
|
Chris@23
|
38 m_chorddict(0),
|
Chris@23
|
39 m_chordnames(0),
|
Chris@23
|
40 m_doNormalizeChroma(0),
|
Chris@23
|
41 m_rollon(0.01)
|
matthiasm@0
|
42 {
|
Chris@23
|
43 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@7
|
44
|
Chris@23
|
45 // make the *note* dictionary matrix
|
Chris@23
|
46 m_dict = new float[nNote * 84];
|
Chris@23
|
47 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
Chris@23
|
48 dictionaryMatrix(m_dict);
|
matthiasm@7
|
49
|
Chris@23
|
50 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
51 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
52 }
|
matthiasm@0
|
53
|
matthiasm@0
|
54
|
matthiasm@0
|
55 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
56 {
|
Chris@23
|
57 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
Chris@23
|
58 delete [] m_dict;
|
Chris@23
|
59 // delete [] m_chorddict;
|
Chris@23
|
60 // delete m_chordnames;
|
matthiasm@0
|
61 }
|
matthiasm@0
|
62
|
matthiasm@0
|
63 string
|
matthiasm@0
|
64 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
65 {
|
Chris@23
|
66 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
67 return "nnls_chroma";
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69
|
matthiasm@0
|
70 string
|
matthiasm@0
|
71 NNLSChroma::getName() const
|
matthiasm@0
|
72 {
|
Chris@23
|
73 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
74 return "NNLS Chroma";
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76
|
matthiasm@0
|
77 string
|
matthiasm@0
|
78 NNLSChroma::getDescription() const
|
matthiasm@0
|
79 {
|
matthiasm@0
|
80 // Return something helpful here!
|
Chris@23
|
81 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@13
|
82 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
|
matthiasm@0
|
83 }
|
matthiasm@0
|
84
|
matthiasm@0
|
85 string
|
matthiasm@0
|
86 NNLSChroma::getMaker() const
|
matthiasm@0
|
87 {
|
Chris@23
|
88 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
89 // Your name here
|
matthiasm@0
|
90 return "Matthias Mauch";
|
matthiasm@0
|
91 }
|
matthiasm@0
|
92
|
matthiasm@0
|
93 int
|
matthiasm@0
|
94 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
95 {
|
Chris@23
|
96 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
97 // Increment this each time you release a version that behaves
|
matthiasm@0
|
98 // differently from the previous one
|
matthiasm@0
|
99 return 1;
|
matthiasm@0
|
100 }
|
matthiasm@0
|
101
|
matthiasm@0
|
102 string
|
matthiasm@0
|
103 NNLSChroma::getCopyright() const
|
matthiasm@0
|
104 {
|
Chris@23
|
105 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
106 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
107 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
108 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
109 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
110 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
111 }
|
matthiasm@0
|
112
|
matthiasm@0
|
113 NNLSChroma::InputDomain
|
matthiasm@0
|
114 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
115 {
|
Chris@23
|
116 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
117 return FrequencyDomain;
|
matthiasm@0
|
118 }
|
matthiasm@0
|
119
|
matthiasm@0
|
120 size_t
|
matthiasm@0
|
121 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
122 {
|
Chris@23
|
123 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
124 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
125 }
|
matthiasm@0
|
126
|
matthiasm@0
|
127 size_t
|
matthiasm@0
|
128 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
129 {
|
Chris@23
|
130 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
131 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
132 // means the same as the block size for TimeDomain
|
Chris@23
|
133 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
134 }
|
matthiasm@0
|
135
|
matthiasm@0
|
136 size_t
|
matthiasm@0
|
137 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
138 {
|
Chris@23
|
139 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
140 return 1;
|
matthiasm@0
|
141 }
|
matthiasm@0
|
142
|
matthiasm@0
|
143 size_t
|
matthiasm@0
|
144 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
145 {
|
Chris@23
|
146 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
147 return 1;
|
matthiasm@0
|
148 }
|
matthiasm@0
|
149
|
matthiasm@0
|
150 NNLSChroma::ParameterList
|
matthiasm@0
|
151 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
152 {
|
Chris@23
|
153 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
154 ParameterList list;
|
matthiasm@0
|
155
|
matthiasm@3
|
156 ParameterDescriptor d3;
|
matthiasm@3
|
157 d3.identifier = "preset";
|
matthiasm@3
|
158 d3.name = "preset";
|
matthiasm@3
|
159 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
160 d3.unit = "";
|
Chris@23
|
161 d3.isQuantized = true;
|
Chris@23
|
162 d3.quantizeStep = 1;
|
matthiasm@3
|
163 d3.minValue = 0.0;
|
matthiasm@4
|
164 d3.maxValue = 3.0;
|
matthiasm@3
|
165 d3.defaultValue = 0.0;
|
matthiasm@3
|
166 d3.valueNames.push_back("polyphonic pop");
|
Chris@23
|
167 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
168 d3.valueNames.push_back("solo keyboard");
|
Chris@23
|
169 d3.valueNames.push_back("manual");
|
matthiasm@3
|
170 list.push_back(d3);
|
matthiasm@4
|
171
|
matthiasm@17
|
172 ParameterDescriptor d5;
|
Chris@23
|
173 d5.identifier = "rollon";
|
Chris@23
|
174 d5.name = "spectral roll-on";
|
Chris@23
|
175 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
|
Chris@23
|
176 d5.unit = "";
|
Chris@23
|
177 d5.minValue = 0;
|
Chris@23
|
178 d5.maxValue = 1;
|
Chris@23
|
179 d5.defaultValue = 0;
|
Chris@23
|
180 d5.isQuantized = false;
|
Chris@23
|
181 list.push_back(d5);
|
matthiasm@17
|
182
|
matthiasm@4
|
183 // ParameterDescriptor d0;
|
matthiasm@4
|
184 // d0.identifier = "notedict";
|
matthiasm@4
|
185 // d0.name = "note dictionary";
|
matthiasm@4
|
186 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
187 // d0.unit = "";
|
matthiasm@4
|
188 // d0.minValue = 0;
|
matthiasm@4
|
189 // d0.maxValue = 1;
|
matthiasm@4
|
190 // d0.defaultValue = 0;
|
matthiasm@4
|
191 // d0.isQuantized = true;
|
matthiasm@4
|
192 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
193 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
194 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
195 // list.push_back(d0);
|
matthiasm@4
|
196
|
matthiasm@4
|
197 ParameterDescriptor d1;
|
matthiasm@4
|
198 d1.identifier = "tuningmode";
|
matthiasm@4
|
199 d1.name = "tuning mode";
|
matthiasm@4
|
200 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
201 d1.unit = "";
|
matthiasm@4
|
202 d1.minValue = 0;
|
matthiasm@4
|
203 d1.maxValue = 1;
|
matthiasm@4
|
204 d1.defaultValue = 0;
|
matthiasm@4
|
205 d1.isQuantized = true;
|
matthiasm@4
|
206 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
207 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
208 d1.quantizeStep = 1.0;
|
matthiasm@4
|
209 list.push_back(d1);
|
matthiasm@4
|
210
|
Chris@23
|
211 // ParameterDescriptor d2;
|
Chris@23
|
212 // d2.identifier = "paling";
|
Chris@23
|
213 // d2.name = "spectral paling";
|
Chris@23
|
214 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
Chris@23
|
215 // d2.unit = "";
|
Chris@23
|
216 // d2.isQuantized = true;
|
Chris@23
|
217 // // d2.quantizeStep = 0.1;
|
Chris@23
|
218 // d2.minValue = 0.0;
|
Chris@23
|
219 // d2.maxValue = 1.0;
|
Chris@23
|
220 // d2.defaultValue = 1.0;
|
Chris@23
|
221 // d2.isQuantized = false;
|
Chris@23
|
222 // list.push_back(d2);
|
Chris@23
|
223 ParameterDescriptor d4;
|
matthiasm@12
|
224 d4.identifier = "chromanormalize";
|
matthiasm@12
|
225 d4.name = "chroma normalization";
|
matthiasm@12
|
226 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
227 d4.unit = "";
|
matthiasm@12
|
228 d4.minValue = 0;
|
matthiasm@13
|
229 d4.maxValue = 3;
|
matthiasm@12
|
230 d4.defaultValue = 0;
|
matthiasm@12
|
231 d4.isQuantized = true;
|
matthiasm@13
|
232 d4.valueNames.push_back("none");
|
matthiasm@13
|
233 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
234 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
235 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
236 d4.quantizeStep = 1.0;
|
matthiasm@12
|
237 list.push_back(d4);
|
matthiasm@4
|
238
|
matthiasm@0
|
239 return list;
|
matthiasm@0
|
240 }
|
matthiasm@0
|
241
|
matthiasm@0
|
242 float
|
matthiasm@0
|
243 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
244 {
|
Chris@23
|
245 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
246 if (identifier == "notedict") {
|
matthiasm@0
|
247 return m_dictID;
|
matthiasm@0
|
248 }
|
matthiasm@0
|
249
|
matthiasm@0
|
250 if (identifier == "paling") {
|
matthiasm@0
|
251 return m_paling;
|
matthiasm@0
|
252 }
|
matthiasm@17
|
253
|
Chris@23
|
254 if (identifier == "rollon") {
|
matthiasm@17
|
255 return m_rollon;
|
matthiasm@17
|
256 }
|
matthiasm@0
|
257
|
matthiasm@0
|
258 if (identifier == "tuningmode") {
|
matthiasm@0
|
259 if (m_tuneLocal) {
|
matthiasm@0
|
260 return 1.0;
|
matthiasm@0
|
261 } else {
|
matthiasm@0
|
262 return 0.0;
|
matthiasm@0
|
263 }
|
matthiasm@0
|
264 }
|
Chris@23
|
265 if (identifier == "preset") {
|
Chris@23
|
266 return m_preset;
|
matthiasm@3
|
267 }
|
Chris@23
|
268 if (identifier == "chromanormalize") {
|
Chris@23
|
269 return m_doNormalizeChroma;
|
matthiasm@12
|
270 }
|
matthiasm@0
|
271 return 0;
|
matthiasm@0
|
272
|
matthiasm@0
|
273 }
|
matthiasm@0
|
274
|
matthiasm@0
|
275 void
|
matthiasm@0
|
276 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
277 {
|
Chris@23
|
278 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
279 if (identifier == "notedict") {
|
matthiasm@0
|
280 m_dictID = (int) value;
|
matthiasm@0
|
281 }
|
matthiasm@0
|
282
|
matthiasm@0
|
283 if (identifier == "paling") {
|
matthiasm@0
|
284 m_paling = value;
|
matthiasm@0
|
285 }
|
matthiasm@0
|
286
|
matthiasm@0
|
287 if (identifier == "tuningmode") {
|
matthiasm@0
|
288 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
289 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
290 }
|
matthiasm@3
|
291 if (identifier == "preset") {
|
matthiasm@3
|
292 m_preset = value;
|
Chris@23
|
293 if (m_preset == 0.0) {
|
Chris@23
|
294 m_tuneLocal = false;
|
Chris@23
|
295 m_paling = 1.0;
|
Chris@23
|
296 m_dictID = 0.0;
|
Chris@23
|
297 }
|
Chris@23
|
298 if (m_preset == 1.0) {
|
Chris@23
|
299 m_tuneLocal = false;
|
Chris@23
|
300 m_paling = 1.0;
|
Chris@23
|
301 m_dictID = 1.0;
|
Chris@23
|
302 }
|
Chris@23
|
303 if (m_preset == 2.0) {
|
Chris@23
|
304 m_tuneLocal = false;
|
Chris@23
|
305 m_paling = 0.7;
|
Chris@23
|
306 m_dictID = 0.0;
|
Chris@23
|
307 }
|
matthiasm@3
|
308 }
|
Chris@23
|
309 if (identifier == "chromanormalize") {
|
Chris@23
|
310 m_doNormalizeChroma = value;
|
Chris@23
|
311 }
|
matthiasm@17
|
312
|
Chris@23
|
313 if (identifier == "rollon") {
|
Chris@23
|
314 m_rollon = value;
|
Chris@23
|
315 }
|
matthiasm@0
|
316 }
|
matthiasm@0
|
317
|
matthiasm@0
|
318 NNLSChroma::ProgramList
|
matthiasm@0
|
319 NNLSChroma::getPrograms() const
|
matthiasm@0
|
320 {
|
Chris@23
|
321 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
322 ProgramList list;
|
matthiasm@0
|
323
|
matthiasm@0
|
324 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
325 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
326
|
matthiasm@0
|
327 return list;
|
matthiasm@0
|
328 }
|
matthiasm@0
|
329
|
matthiasm@0
|
330 string
|
matthiasm@0
|
331 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
332 {
|
Chris@23
|
333 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
334 return ""; // no programs
|
matthiasm@0
|
335 }
|
matthiasm@0
|
336
|
matthiasm@0
|
337 void
|
matthiasm@0
|
338 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
339 {
|
Chris@23
|
340 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
341 }
|
matthiasm@0
|
342
|
matthiasm@0
|
343
|
matthiasm@0
|
344 NNLSChroma::OutputList
|
matthiasm@0
|
345 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
346 {
|
Chris@23
|
347 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
348 OutputList list;
|
matthiasm@0
|
349
|
matthiasm@0
|
350 // Make chroma names for the binNames property
|
matthiasm@0
|
351 vector<string> chromanames;
|
matthiasm@0
|
352 vector<string> bothchromanames;
|
matthiasm@0
|
353 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
354 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
355 if (iNote < 12) {
|
matthiasm@0
|
356 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
357 }
|
matthiasm@0
|
358 }
|
matthiasm@0
|
359
|
Chris@23
|
360 // int nNote = 84;
|
matthiasm@0
|
361
|
matthiasm@0
|
362 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
363 // Every plugin must have at least one output.
|
matthiasm@0
|
364
|
matthiasm@0
|
365 OutputDescriptor d0;
|
matthiasm@0
|
366 d0.identifier = "tuning";
|
matthiasm@0
|
367 d0.name = "Tuning";
|
matthiasm@0
|
368 d0.description = "The concert pitch.";
|
matthiasm@0
|
369 d0.unit = "Hz";
|
matthiasm@0
|
370 d0.hasFixedBinCount = true;
|
matthiasm@0
|
371 d0.binCount = 0;
|
matthiasm@0
|
372 d0.hasKnownExtents = true;
|
Chris@23
|
373 d0.minValue = 427.47;
|
Chris@23
|
374 d0.maxValue = 452.89;
|
matthiasm@0
|
375 d0.isQuantized = false;
|
matthiasm@0
|
376 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
377 d0.hasDuration = false;
|
matthiasm@0
|
378 list.push_back(d0);
|
matthiasm@0
|
379
|
Chris@23
|
380 OutputDescriptor d1;
|
matthiasm@0
|
381 d1.identifier = "logfreqspec";
|
matthiasm@0
|
382 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
383 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
384 d1.unit = "";
|
matthiasm@0
|
385 d1.hasFixedBinCount = true;
|
matthiasm@0
|
386 d1.binCount = nNote;
|
matthiasm@0
|
387 d1.hasKnownExtents = false;
|
matthiasm@0
|
388 d1.isQuantized = false;
|
matthiasm@0
|
389 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
390 d1.hasDuration = false;
|
matthiasm@0
|
391 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
392 list.push_back(d1);
|
matthiasm@0
|
393
|
Chris@23
|
394 OutputDescriptor d2;
|
matthiasm@0
|
395 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
396 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
397 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
398 d2.unit = "";
|
matthiasm@0
|
399 d2.hasFixedBinCount = true;
|
matthiasm@0
|
400 d2.binCount = 256;
|
matthiasm@0
|
401 d2.hasKnownExtents = false;
|
matthiasm@0
|
402 d2.isQuantized = false;
|
matthiasm@0
|
403 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
404 d2.hasDuration = false;
|
matthiasm@0
|
405 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
406 list.push_back(d2);
|
matthiasm@0
|
407
|
matthiasm@0
|
408 OutputDescriptor d3;
|
matthiasm@0
|
409 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
410 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
411 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
412 d3.unit = "";
|
matthiasm@0
|
413 d3.hasFixedBinCount = true;
|
matthiasm@0
|
414 d3.binCount = 84;
|
matthiasm@0
|
415 d3.hasKnownExtents = false;
|
matthiasm@0
|
416 d3.isQuantized = false;
|
matthiasm@0
|
417 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
418 d3.hasDuration = false;
|
matthiasm@0
|
419 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
420 list.push_back(d3);
|
matthiasm@0
|
421
|
matthiasm@0
|
422 OutputDescriptor d4;
|
matthiasm@0
|
423 d4.identifier = "chroma";
|
matthiasm@0
|
424 d4.name = "Chromagram";
|
matthiasm@0
|
425 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
426 d4.unit = "";
|
matthiasm@0
|
427 d4.hasFixedBinCount = true;
|
matthiasm@0
|
428 d4.binCount = 12;
|
matthiasm@0
|
429 d4.binNames = chromanames;
|
matthiasm@0
|
430 d4.hasKnownExtents = false;
|
matthiasm@0
|
431 d4.isQuantized = false;
|
matthiasm@0
|
432 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
433 d4.hasDuration = false;
|
matthiasm@0
|
434 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
435 list.push_back(d4);
|
matthiasm@0
|
436
|
matthiasm@0
|
437 OutputDescriptor d5;
|
matthiasm@0
|
438 d5.identifier = "basschroma";
|
matthiasm@0
|
439 d5.name = "Bass Chromagram";
|
matthiasm@0
|
440 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
441 d5.unit = "";
|
matthiasm@0
|
442 d5.hasFixedBinCount = true;
|
matthiasm@0
|
443 d5.binCount = 12;
|
matthiasm@0
|
444 d5.binNames = chromanames;
|
matthiasm@0
|
445 d5.hasKnownExtents = false;
|
matthiasm@0
|
446 d5.isQuantized = false;
|
matthiasm@0
|
447 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
448 d5.hasDuration = false;
|
matthiasm@0
|
449 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
450 list.push_back(d5);
|
matthiasm@0
|
451
|
matthiasm@0
|
452 OutputDescriptor d6;
|
matthiasm@0
|
453 d6.identifier = "bothchroma";
|
matthiasm@0
|
454 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
455 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
456 d6.unit = "";
|
matthiasm@0
|
457 d6.hasFixedBinCount = true;
|
matthiasm@0
|
458 d6.binCount = 24;
|
matthiasm@0
|
459 d6.binNames = bothchromanames;
|
matthiasm@0
|
460 d6.hasKnownExtents = false;
|
matthiasm@0
|
461 d6.isQuantized = false;
|
matthiasm@0
|
462 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
463 d6.hasDuration = false;
|
matthiasm@0
|
464 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
465 list.push_back(d6);
|
matthiasm@0
|
466
|
matthiasm@0
|
467 OutputDescriptor d7;
|
matthiasm@0
|
468 d7.identifier = "simplechord";
|
matthiasm@0
|
469 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
470 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
471 d7.unit = "";
|
matthiasm@0
|
472 d7.hasFixedBinCount = true;
|
matthiasm@0
|
473 d7.binCount = 0;
|
matthiasm@0
|
474 d7.hasKnownExtents = false;
|
matthiasm@0
|
475 d7.isQuantized = false;
|
matthiasm@0
|
476 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
477 d7.hasDuration = false;
|
matthiasm@0
|
478 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
479 list.push_back(d7);
|
matthiasm@0
|
480
|
Chris@23
|
481 //
|
Chris@23
|
482 // OutputDescriptor d9;
|
Chris@23
|
483 // d9.identifier = "inconsistencysegment";
|
Chris@23
|
484 // d9.name = "Harmonic inconsistency segmenter";
|
Chris@23
|
485 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
Chris@23
|
486 // d9.unit = "";
|
Chris@23
|
487 // d9.hasFixedBinCount = true;
|
Chris@23
|
488 // d9.binCount = 0;
|
Chris@23
|
489 // d9.hasKnownExtents = true;
|
Chris@23
|
490 // d9.minValue = 0.1;
|
Chris@23
|
491 // d9.maxValue = 0.9;
|
Chris@23
|
492 // d9.isQuantized = false;
|
Chris@23
|
493 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@23
|
494 // d9.hasDuration = false;
|
Chris@23
|
495 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
Chris@23
|
496 // list.push_back(d9);
|
Chris@23
|
497 //
|
Chris@23
|
498 OutputDescriptor d10;
|
Chris@23
|
499 d10.identifier = "localtuning";
|
Chris@23
|
500 d10.name = "Local tuning";
|
Chris@23
|
501 d10.description = "Tuning based on the history up to this timestamp.";
|
Chris@23
|
502 d10.unit = "Hz";
|
Chris@23
|
503 d10.hasFixedBinCount = true;
|
Chris@23
|
504 d10.binCount = 1;
|
Chris@23
|
505 d10.hasKnownExtents = true;
|
Chris@23
|
506 d10.minValue = 427.47;
|
Chris@23
|
507 d10.maxValue = 452.89;
|
Chris@23
|
508 d10.isQuantized = false;
|
Chris@23
|
509 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@23
|
510 d10.hasDuration = false;
|
Chris@23
|
511 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
Chris@23
|
512 list.push_back(d10);
|
matthiasm@17
|
513
|
Chris@23
|
514 OutputDescriptor d8;
|
matthiasm@17
|
515 d8.identifier = "harmonicchange";
|
matthiasm@17
|
516 d8.name = "Harmonic change value";
|
matthiasm@17
|
517 d8.description = "Harmonic change.";
|
matthiasm@17
|
518 d8.unit = "";
|
matthiasm@17
|
519 d8.hasFixedBinCount = true;
|
matthiasm@17
|
520 d8.binCount = 1;
|
matthiasm@17
|
521 d8.hasKnownExtents = true;
|
Chris@23
|
522 d8.minValue = 0.0;
|
Chris@23
|
523 d8.maxValue = 0.999;
|
matthiasm@17
|
524 d8.isQuantized = false;
|
matthiasm@17
|
525 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
526 d8.hasDuration = false;
|
matthiasm@17
|
527 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
528 list.push_back(d8);
|
matthiasm@1
|
529
|
matthiasm@0
|
530 return list;
|
matthiasm@0
|
531 }
|
matthiasm@0
|
532
|
matthiasm@0
|
533
|
matthiasm@0
|
534 bool
|
matthiasm@0
|
535 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
536 {
|
Chris@23
|
537 if (debug_on) {
|
Chris@23
|
538 cerr << "--> initialise";
|
Chris@23
|
539 }
|
matthiasm@1
|
540
|
matthiasm@0
|
541 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
542 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
543 m_blockSize = blockSize;
|
matthiasm@0
|
544 m_stepSize = stepSize;
|
matthiasm@0
|
545 frameCount = 0;
|
Chris@23
|
546 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
547 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
548 float *tempkernel;
|
matthiasm@1
|
549
|
Chris@23
|
550 tempkernel = new float[tempn];
|
matthiasm@1
|
551
|
Chris@23
|
552 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
553 m_kernelValue.clear();
|
Chris@23
|
554 m_kernelFftIndex.clear();
|
Chris@23
|
555 m_kernelNoteIndex.clear();
|
Chris@23
|
556 int countNonzero = 0;
|
Chris@23
|
557 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
558 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
559 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
560 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
561 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
562 countNonzero++;
|
Chris@23
|
563 }
|
Chris@23
|
564 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
565 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
566 }
|
Chris@23
|
567 }
|
Chris@23
|
568 }
|
Chris@23
|
569 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
570 delete [] tempkernel;
|
Chris@23
|
571 ofstream myfile;
|
Chris@23
|
572 myfile.open ("matrix.txt");
|
matthiasm@3
|
573 // myfile << "Writing this to a file.\n";
|
Chris@23
|
574 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
575 myfile << m_dict[i] << endl;
|
Chris@23
|
576 }
|
matthiasm@3
|
577 myfile.close();
|
matthiasm@0
|
578 return true;
|
matthiasm@0
|
579 }
|
matthiasm@0
|
580
|
matthiasm@0
|
581 void
|
matthiasm@0
|
582 NNLSChroma::reset()
|
matthiasm@0
|
583 {
|
Chris@23
|
584 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
585
|
matthiasm@0
|
586 // Clear buffers, reset stored values, etc
|
Chris@23
|
587 frameCount = 0;
|
Chris@23
|
588 m_dictID = 0;
|
Chris@23
|
589 m_fl.clear();
|
Chris@23
|
590 m_meanTuning0 = 0;
|
Chris@23
|
591 m_meanTuning1 = 0;
|
Chris@23
|
592 m_meanTuning2 = 0;
|
Chris@23
|
593 m_localTuning0 = 0;
|
Chris@23
|
594 m_localTuning1 = 0;
|
Chris@23
|
595 m_localTuning2 = 0;
|
Chris@23
|
596 m_localTuning.clear();
|
matthiasm@0
|
597 }
|
matthiasm@0
|
598
|
matthiasm@0
|
599 NNLSChroma::FeatureSet
|
matthiasm@0
|
600 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
601 {
|
Chris@23
|
602 if (debug_on) cerr << "--> process" << endl;
|
Chris@23
|
603 frameCount++;
|
Chris@23
|
604 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
605
|
Chris@23
|
606 Feature f10; // local tuning
|
Chris@23
|
607 f10.hasTimestamp = true;
|
Chris@23
|
608 f10.timestamp = timestamp;
|
Chris@23
|
609 const float *fbuf = inputBuffers[0];
|
Chris@23
|
610 float energysum = 0;
|
Chris@23
|
611 // make magnitude
|
Chris@23
|
612 float maxmag = -10000;
|
Chris@23
|
613 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
614 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
615 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
616 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
617 if (m_rollon > 0) {
|
Chris@23
|
618 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
619 }
|
Chris@23
|
620 }
|
matthiasm@14
|
621
|
Chris@23
|
622 float cumenergy = 0;
|
Chris@23
|
623 if (m_rollon > 0) {
|
Chris@23
|
624 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
625 cumenergy += pow(magnitude[iBin],2);
|
Chris@23
|
626 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
|
Chris@23
|
627 else break;
|
Chris@23
|
628 }
|
Chris@23
|
629 }
|
matthiasm@17
|
630
|
Chris@23
|
631 if (maxmag < 2) {
|
Chris@23
|
632 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
633 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
634 magnitude[iBin] = 0;
|
Chris@23
|
635 }
|
Chris@23
|
636 }
|
matthiasm@4
|
637
|
Chris@23
|
638 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
639 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
640 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
641 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
642 }
|
Chris@23
|
643 int binCount = 0;
|
Chris@23
|
644 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
645 // cerr << ".";
|
Chris@23
|
646 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
647 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
648 binCount++;
|
Chris@23
|
649 }
|
Chris@23
|
650 // cerr << nm[20];
|
Chris@23
|
651 // cerr << endl;
|
matthiasm@0
|
652
|
matthiasm@0
|
653
|
matthiasm@0
|
654 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
655 // update means of complex tuning variables
|
matthiasm@0
|
656 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
657 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
658 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
659
|
matthiasm@0
|
660 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
661 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
662 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
663 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
664 float ratioOld = 0.997;
|
matthiasm@3
|
665 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
666 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
667 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
668 }
|
matthiasm@0
|
669
|
matthiasm@0
|
670 // if (m_tuneLocal) {
|
Chris@23
|
671 // local tuning
|
Chris@23
|
672 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
673 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
674 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
675 m_localTuning.push_back(normalisedtuning);
|
Chris@23
|
676 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
Chris@23
|
677 f10.values.push_back(tuning440);
|
Chris@23
|
678 // cerr << tuning440 << endl;
|
matthiasm@0
|
679 // }
|
matthiasm@0
|
680
|
Chris@23
|
681 Feature f1; // logfreqspec
|
Chris@23
|
682 f1.hasTimestamp = true;
|
matthiasm@0
|
683 f1.timestamp = timestamp;
|
Chris@23
|
684 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
685 f1.values.push_back(nm[iNote]);
|
Chris@23
|
686 }
|
matthiasm@0
|
687
|
Chris@23
|
688 FeatureSet fs;
|
Chris@23
|
689 fs[1].push_back(f1);
|
matthiasm@3
|
690 fs[8].push_back(f10);
|
matthiasm@0
|
691
|
matthiasm@0
|
692 // deletes
|
matthiasm@0
|
693 delete[] magnitude;
|
matthiasm@0
|
694 delete[] nm;
|
matthiasm@0
|
695
|
matthiasm@0
|
696 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
Chris@23
|
697 char * pPath;
|
Chris@23
|
698 pPath = getenv ("VAMP_PATH");
|
matthiasm@7
|
699
|
matthiasm@7
|
700
|
Chris@23
|
701 return fs;
|
matthiasm@0
|
702 }
|
matthiasm@0
|
703
|
matthiasm@0
|
704 NNLSChroma::FeatureSet
|
matthiasm@0
|
705 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
706 {
|
Chris@23
|
707 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
708 FeatureSet fsOut;
|
Chris@23
|
709 if (m_fl.size() == 0) return fsOut;
|
Chris@23
|
710 int nChord = m_chordnames.size();
|
Chris@23
|
711 //
|
Chris@23
|
712 /** Calculate Tuning
|
Chris@23
|
713 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
714 cumulative mean real and imag values)
|
Chris@23
|
715 **/
|
Chris@23
|
716 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
717 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
718 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
719 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
720 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
721 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
722
|
Chris@23
|
723 char buffer0 [50];
|
matthiasm@1
|
724
|
Chris@23
|
725 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
726
|
Chris@23
|
727 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
728
|
Chris@23
|
729 // push tuning to FeatureSet fsOut
|
Chris@23
|
730 Feature f0; // tuning
|
Chris@23
|
731 f0.hasTimestamp = true;
|
Chris@23
|
732 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
733 f0.label = buffer0;
|
Chris@23
|
734 fsOut[0].push_back(f0);
|
matthiasm@1
|
735
|
Chris@23
|
736 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
737 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
738 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
739 **/
|
Chris@23
|
740 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
741
|
Chris@23
|
742 float tempValue = 0;
|
Chris@23
|
743 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
744 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
745 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
746 int count = 0;
|
matthiasm@1
|
747
|
Chris@23
|
748 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
Chris@23
|
749 Feature f1 = *i;
|
Chris@23
|
750 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
751 f2.hasTimestamp = true;
|
Chris@23
|
752 f2.timestamp = f1.timestamp;
|
Chris@23
|
753 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
754
|
Chris@23
|
755 if (m_tuneLocal) {
|
Chris@23
|
756 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
757 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
758 }
|
matthiasm@1
|
759
|
Chris@23
|
760 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
761
|
Chris@23
|
762 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
763 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
764 f2.values.push_back(tempValue);
|
Chris@23
|
765 }
|
matthiasm@1
|
766
|
Chris@23
|
767 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
768 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
769 vector<float> runningstd;
|
Chris@23
|
770 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
771 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
772 }
|
Chris@23
|
773 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
774 for (int i = 0; i < 256; i++) {
|
Chris@23
|
775 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
776 if (runningstd[i] > 0) {
|
Chris@23
|
777 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
Chris@23
|
778 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
Chris@23
|
779 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
Chris@23
|
780 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
Chris@23
|
781 }
|
Chris@23
|
782 if (f2.values[i] < 0) {
|
Chris@23
|
783 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
784 }
|
Chris@23
|
785 }
|
Chris@23
|
786 fsOut[2].push_back(f2);
|
Chris@23
|
787 count++;
|
Chris@23
|
788 }
|
Chris@23
|
789 cerr << "done." << endl;
|
matthiasm@1
|
790
|
Chris@23
|
791 /** Semitone spectrum and chromagrams
|
Chris@23
|
792 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
793 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
794 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
795 bass and treble stacked onto each other).
|
Chris@23
|
796 **/
|
Chris@23
|
797 if (m_dictID == 1) {
|
Chris@23
|
798 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
799 } else {
|
Chris@23
|
800 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
801 }
|
matthiasm@13
|
802
|
matthiasm@1
|
803
|
Chris@23
|
804 vector<vector<float> > chordogram;
|
Chris@23
|
805 vector<vector<int> > scoreChordogram;
|
Chris@23
|
806 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
807 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
808 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
809 count = 0;
|
matthiasm@9
|
810
|
Chris@23
|
811 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
812 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
813 Feature f3; // semitone spectrum
|
Chris@23
|
814 Feature f4; // treble chromagram
|
Chris@23
|
815 Feature f5; // bass chromagram
|
Chris@23
|
816 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
817
|
Chris@23
|
818 f3.hasTimestamp = true;
|
Chris@23
|
819 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
820
|
Chris@23
|
821 f4.hasTimestamp = true;
|
Chris@23
|
822 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
823
|
Chris@23
|
824 f5.hasTimestamp = true;
|
Chris@23
|
825 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
826
|
Chris@23
|
827 f6.hasTimestamp = true;
|
Chris@23
|
828 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
829
|
Chris@29
|
830 float b[256];
|
matthiasm@1
|
831
|
Chris@23
|
832 bool some_b_greater_zero = false;
|
Chris@23
|
833 float sumb = 0;
|
Chris@23
|
834 for (int i = 0; i < 256; i++) {
|
Chris@23
|
835 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
836 b[i] = f2.values[i];
|
Chris@23
|
837 sumb += b[i];
|
Chris@23
|
838 if (b[i] > 0) {
|
Chris@23
|
839 some_b_greater_zero = true;
|
Chris@23
|
840 }
|
Chris@23
|
841 }
|
matthiasm@1
|
842
|
Chris@23
|
843 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
844
|
Chris@23
|
845 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
846 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
847 float currval;
|
Chris@23
|
848 unsigned iSemitone = 0;
|
matthiasm@1
|
849
|
Chris@23
|
850 if (some_b_greater_zero) {
|
Chris@23
|
851 if (m_dictID == 1) {
|
Chris@23
|
852 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
853 currval = 0;
|
Chris@23
|
854 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
855 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
856 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
857 f3.values.push_back(currval);
|
Chris@23
|
858 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
859 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
860 iSemitone++;
|
Chris@23
|
861 }
|
matthiasm@1
|
862
|
Chris@23
|
863 } else {
|
Chris@29
|
864 float x[84+1000];
|
Chris@23
|
865 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
866 vector<int> signifIndex;
|
Chris@23
|
867 int index=0;
|
Chris@23
|
868 sumb /= 84.0;
|
Chris@23
|
869 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
870 float currval = 0;
|
Chris@23
|
871 currval += b[iNote + 1 + -1];
|
Chris@23
|
872 currval += b[iNote + 1 + 0];
|
Chris@23
|
873 currval += b[iNote + 1 + 1];
|
Chris@23
|
874 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
875 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
876 index++;
|
Chris@23
|
877 }
|
Chris@29
|
878 float rnorm;
|
Chris@29
|
879 float w[84+1000];
|
Chris@29
|
880 float zz[84+1000];
|
Chris@23
|
881 int indx[84+1000];
|
Chris@23
|
882 int mode;
|
Chris@23
|
883 int dictsize = 256*signifIndex.size();
|
Chris@23
|
884 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@29
|
885 float *curr_dict = new float[dictsize];
|
Chris@23
|
886 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
887 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
888 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
889 }
|
Chris@23
|
890 }
|
Chris@29
|
891 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
892 delete [] curr_dict;
|
Chris@23
|
893 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
894 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
895 // cerr << mode << endl;
|
Chris@23
|
896 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
897 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
898 }
|
Chris@23
|
899 }
|
Chris@23
|
900 }
|
matthiasm@13
|
901
|
matthiasm@10
|
902
|
matthiasm@12
|
903
|
matthiasm@13
|
904
|
Chris@23
|
905 f4.values = chroma;
|
Chris@23
|
906 f5.values = basschroma;
|
Chris@23
|
907 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
908 f6.values = chroma;
|
matthiasm@1
|
909
|
Chris@23
|
910 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
911 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
912 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
913 case 0: // should never end up here
|
Chris@23
|
914 break;
|
Chris@23
|
915 case 1:
|
Chris@23
|
916 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
917 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
918 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
919 break;
|
Chris@23
|
920 case 2:
|
Chris@23
|
921 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
922 chromanorm[0] += *it;
|
Chris@23
|
923 }
|
Chris@23
|
924 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
925 chromanorm[1] += *it;
|
Chris@23
|
926 }
|
Chris@23
|
927 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
928 chromanorm[2] += *it;
|
Chris@23
|
929 }
|
Chris@23
|
930 break;
|
Chris@23
|
931 case 3:
|
Chris@23
|
932 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
933 chromanorm[0] += pow(*it,2);
|
Chris@23
|
934 }
|
Chris@23
|
935 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
936 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
937 chromanorm[1] += pow(*it,2);
|
Chris@23
|
938 }
|
Chris@23
|
939 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
940 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
941 chromanorm[2] += pow(*it,2);
|
Chris@23
|
942 }
|
Chris@23
|
943 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
944 break;
|
Chris@23
|
945 }
|
Chris@23
|
946 if (chromanorm[0] > 0) {
|
Chris@23
|
947 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
948 f4.values[i] /= chromanorm[0];
|
Chris@23
|
949 }
|
Chris@23
|
950 }
|
Chris@23
|
951 if (chromanorm[1] > 0) {
|
Chris@23
|
952 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
953 f5.values[i] /= chromanorm[1];
|
Chris@23
|
954 }
|
Chris@23
|
955 }
|
Chris@23
|
956 if (chromanorm[2] > 0) {
|
Chris@23
|
957 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
958 f6.values[i] /= chromanorm[2];
|
Chris@23
|
959 }
|
Chris@23
|
960 }
|
matthiasm@13
|
961
|
Chris@23
|
962 }
|
matthiasm@13
|
963
|
Chris@23
|
964 // local chord estimation
|
Chris@23
|
965 vector<float> currentChordSalience;
|
Chris@23
|
966 float tempchordvalue = 0;
|
Chris@23
|
967 float sumchordvalue = 0;
|
matthiasm@9
|
968
|
Chris@23
|
969 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
970 tempchordvalue = 0;
|
Chris@23
|
971 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
972 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
973 }
|
Chris@23
|
974 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
975 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
976 }
|
Chris@23
|
977 sumchordvalue+=tempchordvalue;
|
Chris@23
|
978 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
979 }
|
Chris@23
|
980 if (sumchordvalue > 0) {
|
Chris@23
|
981 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
982 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
983 }
|
Chris@23
|
984 } else {
|
Chris@23
|
985 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
986 }
|
Chris@23
|
987 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
988
|
Chris@23
|
989 fsOut[3].push_back(f3);
|
Chris@23
|
990 fsOut[4].push_back(f4);
|
Chris@23
|
991 fsOut[5].push_back(f5);
|
Chris@23
|
992 fsOut[6].push_back(f6);
|
Chris@23
|
993 count++;
|
Chris@23
|
994 }
|
Chris@23
|
995 cerr << "done." << endl;
|
matthiasm@13
|
996
|
matthiasm@10
|
997
|
Chris@23
|
998 /* Simple chord estimation
|
Chris@23
|
999 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
1000 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
1001 */
|
Chris@23
|
1002 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
1003 count = 0;
|
Chris@23
|
1004 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
1005 vector<int> chordSequence;
|
Chris@23
|
1006 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
1007 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
1008 scoreChordogram.push_back(temp);
|
Chris@23
|
1009 }
|
Chris@23
|
1010 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
1011 int startIndex = count + 1;
|
Chris@23
|
1012 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
1013
|
Chris@23
|
1014 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
1015
|
Chris@23
|
1016 vector<int> chordCandidates;
|
Chris@23
|
1017 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
1018 // float currsum = 0;
|
Chris@23
|
1019 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
1020 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
1021 // }
|
Chris@23
|
1022 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
1023 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
1024 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
1025 chordCandidates.push_back(iChord);
|
Chris@23
|
1026 break;
|
Chris@23
|
1027 }
|
Chris@23
|
1028 }
|
Chris@23
|
1029 }
|
Chris@23
|
1030 chordCandidates.push_back(nChord-1);
|
Chris@23
|
1031 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
1032
|
Chris@23
|
1033 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
1034 float maxindex = 0; //... and the index thereof
|
Chris@23
|
1035 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
1036 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
1037
|
Chris@23
|
1038 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
1039 // now find the max values on both sides of iWF
|
Chris@23
|
1040 // left side:
|
Chris@23
|
1041 float maxL = 0;
|
Chris@23
|
1042 unsigned maxindL = nChord-1;
|
Chris@23
|
1043 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
1044 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
1045 float currsum = 0;
|
Chris@23
|
1046 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
1047 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
1048 }
|
Chris@23
|
1049 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
1050 if (currsum > maxL) {
|
Chris@23
|
1051 maxL = currsum;
|
Chris@23
|
1052 maxindL = iChord;
|
Chris@23
|
1053 }
|
Chris@23
|
1054 }
|
Chris@23
|
1055 // right side:
|
Chris@23
|
1056 float maxR = 0;
|
Chris@23
|
1057 unsigned maxindR = nChord-1;
|
Chris@23
|
1058 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
1059 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
1060 float currsum = 0;
|
Chris@23
|
1061 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
1062 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
1063 }
|
Chris@23
|
1064 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
1065 if (currsum > maxR) {
|
Chris@23
|
1066 maxR = currsum;
|
Chris@23
|
1067 maxindR = iChord;
|
Chris@23
|
1068 }
|
Chris@23
|
1069 }
|
Chris@23
|
1070 if (maxL+maxR > maxval) {
|
Chris@23
|
1071 maxval = maxL+maxR;
|
Chris@23
|
1072 maxindex = iWF;
|
Chris@23
|
1073 bestchordL = maxindL;
|
Chris@23
|
1074 bestchordR = maxindR;
|
Chris@23
|
1075 }
|
matthiasm@3
|
1076
|
Chris@23
|
1077 }
|
Chris@23
|
1078 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
1079 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
1080 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
1081 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
1082 }
|
Chris@23
|
1083 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
1084 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
1085 }
|
Chris@23
|
1086 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
1087 count++;
|
Chris@23
|
1088 }
|
Chris@23
|
1089 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
1090 count = 0;
|
Chris@23
|
1091 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
1092 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
1093 float maxindex = 0; //... and the index thereof
|
Chris@23
|
1094 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
1095 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
1096 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
1097 maxindex = iChord;
|
Chris@23
|
1098 // cerr << iChord << endl;
|
Chris@23
|
1099 }
|
Chris@23
|
1100 }
|
Chris@23
|
1101 chordSequence.push_back(maxindex);
|
Chris@23
|
1102 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
1103 count++;
|
Chris@23
|
1104 }
|
Chris@23
|
1105 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
1106
|
matthiasm@3
|
1107
|
Chris@23
|
1108 // mode filter on chordSequence
|
Chris@23
|
1109 count = 0;
|
Chris@23
|
1110 string oldChord = "";
|
Chris@23
|
1111 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
1112 Feature f6 = *it;
|
Chris@23
|
1113 Feature f7; // chord estimate
|
Chris@23
|
1114 f7.hasTimestamp = true;
|
Chris@23
|
1115 f7.timestamp = f6.timestamp;
|
Chris@23
|
1116 Feature f8; // chord estimate
|
Chris@23
|
1117 f8.hasTimestamp = true;
|
Chris@23
|
1118 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
1119
|
Chris@23
|
1120 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
1121 int maxChordCount = 0;
|
Chris@23
|
1122 int maxChordIndex = nChord-1;
|
Chris@23
|
1123 string maxChord;
|
Chris@23
|
1124 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
1125 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
1126 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
1127 chordCount[chordSequence[i]]++;
|
Chris@23
|
1128 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
1129 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
1130 maxChordCount++;
|
Chris@23
|
1131 maxChordIndex = chordSequence[i];
|
Chris@23
|
1132 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
1133 }
|
Chris@23
|
1134 }
|
Chris@23
|
1135 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
1136 // cerr << maxChordIndex << endl;
|
Chris@23
|
1137 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
1138 // cerr << chordchange[count] << endl;
|
Chris@23
|
1139 fsOut[9].push_back(f8);
|
Chris@23
|
1140 if (oldChord != maxChord) {
|
Chris@23
|
1141 oldChord = maxChord;
|
matthiasm@3
|
1142
|
Chris@23
|
1143 // char buffer1 [50];
|
Chris@23
|
1144 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
1145 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
1146 // } else {
|
Chris@23
|
1147 // sprintf(buffer1, "N");
|
Chris@23
|
1148 // }
|
Chris@23
|
1149 // f7.label = buffer1;
|
Chris@23
|
1150 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
1151 fsOut[7].push_back(f7);
|
Chris@23
|
1152 }
|
Chris@23
|
1153 count++;
|
Chris@23
|
1154 }
|
Chris@23
|
1155 Feature f7; // last chord estimate
|
Chris@23
|
1156 f7.hasTimestamp = true;
|
Chris@23
|
1157 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
1158 f7.label = "N";
|
Chris@23
|
1159 fsOut[7].push_back(f7);
|
Chris@23
|
1160 cerr << "done." << endl;
|
Chris@23
|
1161 // // musicity
|
Chris@23
|
1162 // count = 0;
|
Chris@23
|
1163 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
1164 // vector<float> musicityValue;
|
Chris@23
|
1165 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
1166 // Feature f4 = *it;
|
Chris@23
|
1167 //
|
Chris@23
|
1168 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
1169 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
1170 // float chromasum = 0;
|
Chris@23
|
1171 // float diffsum = 0;
|
Chris@23
|
1172 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
1173 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
1174 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
1175 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
1176 // }
|
Chris@23
|
1177 // }
|
Chris@23
|
1178 // diffsum /= chromasum;
|
Chris@23
|
1179 // musicityValue.push_back(diffsum);
|
Chris@23
|
1180 // count++;
|
Chris@23
|
1181 // }
|
Chris@23
|
1182 //
|
Chris@23
|
1183 // float musicityThreshold = 0.44;
|
Chris@23
|
1184 // if (m_stepSize == 4096) {
|
Chris@23
|
1185 // musicityThreshold = 0.74;
|
Chris@23
|
1186 // }
|
Chris@23
|
1187 // if (m_stepSize == 4410) {
|
Chris@23
|
1188 // musicityThreshold = 0.77;
|
Chris@23
|
1189 // }
|
Chris@23
|
1190 //
|
Chris@23
|
1191 // count = 0;
|
Chris@23
|
1192 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
1193 // Feature f4 = *it;
|
Chris@23
|
1194 // Feature f8; // musicity
|
Chris@23
|
1195 // Feature f9; // musicity segmenter
|
Chris@23
|
1196 //
|
Chris@23
|
1197 // f8.hasTimestamp = true;
|
Chris@23
|
1198 // f8.timestamp = f4.timestamp;
|
Chris@23
|
1199 // f9.hasTimestamp = true;
|
Chris@23
|
1200 // f9.timestamp = f4.timestamp;
|
Chris@23
|
1201 //
|
Chris@23
|
1202 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
1203 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
1204 // int musicityCount = 0;
|
Chris@23
|
1205 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
1206 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
1207 // }
|
Chris@23
|
1208 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
1209 //
|
Chris@23
|
1210 // if (isSpeech) {
|
Chris@23
|
1211 // if (oldlabeltype != 2) {
|
Chris@23
|
1212 // f9.label = "Speech";
|
Chris@23
|
1213 // fsOut[9].push_back(f9);
|
Chris@23
|
1214 // oldlabeltype = 2;
|
Chris@23
|
1215 // }
|
Chris@23
|
1216 // } else {
|
Chris@23
|
1217 // if (oldlabeltype != 1) {
|
Chris@23
|
1218 // f9.label = "Music";
|
Chris@23
|
1219 // fsOut[9].push_back(f9);
|
Chris@23
|
1220 // oldlabeltype = 1;
|
Chris@23
|
1221 // }
|
Chris@23
|
1222 // }
|
Chris@23
|
1223 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1224 // fsOut[8].push_back(f8);
|
Chris@23
|
1225 // count++;
|
Chris@23
|
1226 // }
|
Chris@23
|
1227 return fsOut;
|
matthiasm@0
|
1228
|
matthiasm@0
|
1229 }
|
matthiasm@0
|
1230
|