comparison NNLSChroma.cpp @ 35:cf8898a0174c matthiasm-plugin

* Split out NNLSChroma plugin into three plugins (chroma, chordino, tuning) with a common base class. There's still quite a lot of duplication between the getRemainingFeatures functions. Also add copyright / copying headers, etc.
author Chris Cannam
date Fri, 22 Oct 2010 11:30:21 +0100
parents da3195577172
children 3c261b864e49
comparison
equal deleted inserted replaced
34:8edcf48f4031 35:cf8898a0174c
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 NNLS-Chroma / Chordino
5
6 Audio feature extraction plugins for chromagram and chord
7 estimation.
8
9 Centre for Digital Music, Queen Mary University of London.
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
11
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version. See the file
16 COPYING included with this distribution for more information.
17 */
2 18
3 #include "NNLSChroma.h" 19 #include "NNLSChroma.h"
4 20
5 #include "chromamethods.h" 21 #include "chromamethods.h"
6 22
13 const bool debug_on = false; 29 const bool debug_on = false;
14 30
15 const vector<float> hw(hammingwind, hammingwind+19); 31 const vector<float> hw(hammingwind, hammingwind+19);
16 32
17 NNLSChroma::NNLSChroma(float inputSampleRate) : 33 NNLSChroma::NNLSChroma(float inputSampleRate) :
18 Plugin(inputSampleRate), 34 NNLSBase(inputSampleRate)
19 m_fl(0),
20 m_blockSize(0),
21 m_stepSize(0),
22 m_lengthOfNoteIndex(0),
23 m_meanTuning0(0),
24 m_meanTuning1(0),
25 m_meanTuning2(0),
26 m_localTuning0(0),
27 m_localTuning1(0),
28 m_localTuning2(0),
29 m_paling(1.0),
30 m_preset(0.0),
31 m_localTuning(0),
32 m_kernelValue(0),
33 m_kernelFftIndex(0),
34 m_kernelNoteIndex(0),
35 m_dict(0),
36 m_tuneLocal(false),
37 m_dictID(0),
38 m_chorddict(0),
39 m_chordnames(0),
40 m_doNormalizeChroma(0),
41 m_rollon(0.01)
42 { 35 {
43 if (debug_on) cerr << "--> NNLSChroma" << endl; 36 if (debug_on) cerr << "--> NNLSChroma" << endl;
44 37 }
45 // make the *note* dictionary matrix
46 m_dict = new float[nNote * 84];
47 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
48 dictionaryMatrix(m_dict);
49
50 // get the *chord* dictionary from file (if the file exists)
51 m_chordnames = chordDictionary(&m_chorddict);
52 }
53
54 38
55 NNLSChroma::~NNLSChroma() 39 NNLSChroma::~NNLSChroma()
56 { 40 {
57 if (debug_on) cerr << "--> ~NNLSChroma" << endl; 41 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
58 delete [] m_dict;
59 // delete [] m_chorddict;
60 // delete m_chordnames;
61 } 42 }
62 43
63 string 44 string
64 NNLSChroma::getIdentifier() const 45 NNLSChroma::getIdentifier() const
65 { 46 {
75 } 56 }
76 57
77 string 58 string
78 NNLSChroma::getDescription() const 59 NNLSChroma::getDescription() const
79 { 60 {
80 // Return something helpful here!
81 if (debug_on) cerr << "--> getDescription" << endl; 61 if (debug_on) cerr << "--> getDescription" << endl;
82 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate."; 62 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
83 } 63 }
84
85 string
86 NNLSChroma::getMaker() const
87 {
88 if (debug_on) cerr << "--> getMaker" << endl;
89 // Your name here
90 return "Matthias Mauch";
91 }
92
93 int
94 NNLSChroma::getPluginVersion() const
95 {
96 if (debug_on) cerr << "--> getPluginVersion" << endl;
97 // Increment this each time you release a version that behaves
98 // differently from the previous one
99 return 1;
100 }
101
102 string
103 NNLSChroma::getCopyright() const
104 {
105 if (debug_on) cerr << "--> getCopyright" << endl;
106 // This function is not ideally named. It does not necessarily
107 // need to say who made the plugin -- getMaker does that -- but it
108 // should indicate the terms under which it is distributed. For
109 // example, "Copyright (year). All Rights Reserved", or "GPL"
110 return "Copyright (2010). All rights reserved.";
111 }
112
113 NNLSChroma::InputDomain
114 NNLSChroma::getInputDomain() const
115 {
116 if (debug_on) cerr << "--> getInputDomain" << endl;
117 return FrequencyDomain;
118 }
119
120 size_t
121 NNLSChroma::getPreferredBlockSize() const
122 {
123 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
124 return 16384; // 0 means "I can handle any block size"
125 }
126
127 size_t
128 NNLSChroma::getPreferredStepSize() const
129 {
130 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
131 return 2048; // 0 means "anything sensible"; in practice this
132 // means the same as the block size for TimeDomain
133 // plugins, or half of it for FrequencyDomain plugins
134 }
135
136 size_t
137 NNLSChroma::getMinChannelCount() const
138 {
139 if (debug_on) cerr << "--> getMinChannelCount" << endl;
140 return 1;
141 }
142
143 size_t
144 NNLSChroma::getMaxChannelCount() const
145 {
146 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
147 return 1;
148 }
149
150 NNLSChroma::ParameterList
151 NNLSChroma::getParameterDescriptors() const
152 {
153 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
154 ParameterList list;
155
156 ParameterDescriptor d3;
157 d3.identifier = "preset";
158 d3.name = "preset";
159 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
160 d3.unit = "";
161 d3.isQuantized = true;
162 d3.quantizeStep = 1;
163 d3.minValue = 0.0;
164 d3.maxValue = 3.0;
165 d3.defaultValue = 0.0;
166 d3.valueNames.push_back("polyphonic pop");
167 d3.valueNames.push_back("polyphonic pop (fast)");
168 d3.valueNames.push_back("solo keyboard");
169 d3.valueNames.push_back("manual");
170 list.push_back(d3);
171
172 ParameterDescriptor d5;
173 d5.identifier = "rollon";
174 d5.name = "spectral roll-on";
175 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
176 d5.unit = "";
177 d5.minValue = 0;
178 d5.maxValue = 1;
179 d5.defaultValue = 0;
180 d5.isQuantized = false;
181 list.push_back(d5);
182
183 // ParameterDescriptor d0;
184 // d0.identifier = "notedict";
185 // d0.name = "note dictionary";
186 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
187 // d0.unit = "";
188 // d0.minValue = 0;
189 // d0.maxValue = 1;
190 // d0.defaultValue = 0;
191 // d0.isQuantized = true;
192 // d0.valueNames.push_back("s = 0.6");
193 // d0.valueNames.push_back("no NNLS");
194 // d0.quantizeStep = 1.0;
195 // list.push_back(d0);
196
197 ParameterDescriptor d1;
198 d1.identifier = "tuningmode";
199 d1.name = "tuning mode";
200 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
201 d1.unit = "";
202 d1.minValue = 0;
203 d1.maxValue = 1;
204 d1.defaultValue = 0;
205 d1.isQuantized = true;
206 d1.valueNames.push_back("global tuning");
207 d1.valueNames.push_back("local tuning");
208 d1.quantizeStep = 1.0;
209 list.push_back(d1);
210
211 // ParameterDescriptor d2;
212 // d2.identifier = "paling";
213 // d2.name = "spectral paling";
214 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
215 // d2.unit = "";
216 // d2.isQuantized = true;
217 // // d2.quantizeStep = 0.1;
218 // d2.minValue = 0.0;
219 // d2.maxValue = 1.0;
220 // d2.defaultValue = 1.0;
221 // d2.isQuantized = false;
222 // list.push_back(d2);
223 ParameterDescriptor d4;
224 d4.identifier = "chromanormalize";
225 d4.name = "chroma normalization";
226 d4.description = "How shall the chroma vector be normalized?";
227 d4.unit = "";
228 d4.minValue = 0;
229 d4.maxValue = 3;
230 d4.defaultValue = 0;
231 d4.isQuantized = true;
232 d4.valueNames.push_back("none");
233 d4.valueNames.push_back("maximum norm");
234 d4.valueNames.push_back("L1 norm");
235 d4.valueNames.push_back("L2 norm");
236 d4.quantizeStep = 1.0;
237 list.push_back(d4);
238
239 return list;
240 }
241
242 float
243 NNLSChroma::getParameter(string identifier) const
244 {
245 if (debug_on) cerr << "--> getParameter" << endl;
246 if (identifier == "notedict") {
247 return m_dictID;
248 }
249
250 if (identifier == "paling") {
251 return m_paling;
252 }
253
254 if (identifier == "rollon") {
255 return m_rollon;
256 }
257
258 if (identifier == "tuningmode") {
259 if (m_tuneLocal) {
260 return 1.0;
261 } else {
262 return 0.0;
263 }
264 }
265 if (identifier == "preset") {
266 return m_preset;
267 }
268 if (identifier == "chromanormalize") {
269 return m_doNormalizeChroma;
270 }
271 return 0;
272
273 }
274
275 void
276 NNLSChroma::setParameter(string identifier, float value)
277 {
278 if (debug_on) cerr << "--> setParameter" << endl;
279 if (identifier == "notedict") {
280 m_dictID = (int) value;
281 }
282
283 if (identifier == "paling") {
284 m_paling = value;
285 }
286
287 if (identifier == "tuningmode") {
288 m_tuneLocal = (value > 0) ? true : false;
289 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
290 }
291 if (identifier == "preset") {
292 m_preset = value;
293 if (m_preset == 0.0) {
294 m_tuneLocal = false;
295 m_paling = 1.0;
296 m_dictID = 0.0;
297 }
298 if (m_preset == 1.0) {
299 m_tuneLocal = false;
300 m_paling = 1.0;
301 m_dictID = 1.0;
302 }
303 if (m_preset == 2.0) {
304 m_tuneLocal = false;
305 m_paling = 0.7;
306 m_dictID = 0.0;
307 }
308 }
309 if (identifier == "chromanormalize") {
310 m_doNormalizeChroma = value;
311 }
312
313 if (identifier == "rollon") {
314 m_rollon = value;
315 }
316 }
317
318 NNLSChroma::ProgramList
319 NNLSChroma::getPrograms() const
320 {
321 if (debug_on) cerr << "--> getPrograms" << endl;
322 ProgramList list;
323
324 // If you have no programs, return an empty list (or simply don't
325 // implement this function or getCurrentProgram/selectProgram)
326
327 return list;
328 }
329
330 string
331 NNLSChroma::getCurrentProgram() const
332 {
333 if (debug_on) cerr << "--> getCurrentProgram" << endl;
334 return ""; // no programs
335 }
336
337 void
338 NNLSChroma::selectProgram(string name)
339 {
340 if (debug_on) cerr << "--> selectProgram" << endl;
341 }
342
343 64
344 NNLSChroma::OutputList 65 NNLSChroma::OutputList
345 NNLSChroma::getOutputDescriptors() const 66 NNLSChroma::getOutputDescriptors() const
346 { 67 {
347 if (debug_on) cerr << "--> getOutputDescriptors" << endl; 68 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
355 if (iNote < 12) { 76 if (iNote < 12) {
356 chromanames.push_back(notenames[iNote]); 77 chromanames.push_back(notenames[iNote]);
357 } 78 }
358 } 79 }
359 80
360 // int nNote = 84; 81 int index = 0;
361 82
362 // See OutputDescriptor documentation for the possibilities here.
363 // Every plugin must have at least one output.
364
365 OutputDescriptor d0;
366 d0.identifier = "tuning";
367 d0.name = "Tuning";
368 d0.description = "The concert pitch.";
369 d0.unit = "Hz";
370 d0.hasFixedBinCount = true;
371 d0.binCount = 0;
372 d0.hasKnownExtents = true;
373 d0.minValue = 427.47;
374 d0.maxValue = 452.89;
375 d0.isQuantized = false;
376 d0.sampleType = OutputDescriptor::VariableSampleRate;
377 d0.hasDuration = false;
378 list.push_back(d0);
379
380 OutputDescriptor d1; 83 OutputDescriptor d1;
381 d1.identifier = "logfreqspec"; 84 d1.identifier = "logfreqspec";
382 d1.name = "Log-Frequency Spectrum"; 85 d1.name = "Log-Frequency Spectrum";
383 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping."; 86 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
384 d1.unit = ""; 87 d1.unit = "";
388 d1.isQuantized = false; 91 d1.isQuantized = false;
389 d1.sampleType = OutputDescriptor::FixedSampleRate; 92 d1.sampleType = OutputDescriptor::FixedSampleRate;
390 d1.hasDuration = false; 93 d1.hasDuration = false;
391 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 94 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
392 list.push_back(d1); 95 list.push_back(d1);
96 m_outputLogSpec = index++;
393 97
394 OutputDescriptor d2; 98 OutputDescriptor d2;
395 d2.identifier = "tunedlogfreqspec"; 99 d2.identifier = "tunedlogfreqspec";
396 d2.name = "Tuned Log-Frequency Spectrum"; 100 d2.name = "Tuned Log-Frequency Spectrum";
397 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency."; 101 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
402 d2.isQuantized = false; 106 d2.isQuantized = false;
403 d2.sampleType = OutputDescriptor::FixedSampleRate; 107 d2.sampleType = OutputDescriptor::FixedSampleRate;
404 d2.hasDuration = false; 108 d2.hasDuration = false;
405 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 109 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
406 list.push_back(d2); 110 list.push_back(d2);
111 m_outputTunedSpec = index++;
407 112
408 OutputDescriptor d3; 113 OutputDescriptor d3;
409 d3.identifier = "semitonespectrum"; 114 d3.identifier = "semitonespectrum";
410 d3.name = "Semitone Spectrum"; 115 d3.name = "Semitone Spectrum";
411 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum."; 116 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
416 d3.isQuantized = false; 121 d3.isQuantized = false;
417 d3.sampleType = OutputDescriptor::FixedSampleRate; 122 d3.sampleType = OutputDescriptor::FixedSampleRate;
418 d3.hasDuration = false; 123 d3.hasDuration = false;
419 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 124 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
420 list.push_back(d3); 125 list.push_back(d3);
126 m_outputSemiSpec = index++;
421 127
422 OutputDescriptor d4; 128 OutputDescriptor d4;
423 d4.identifier = "chroma"; 129 d4.identifier = "chroma";
424 d4.name = "Chromagram"; 130 d4.name = "Chromagram";
425 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range."; 131 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
431 d4.isQuantized = false; 137 d4.isQuantized = false;
432 d4.sampleType = OutputDescriptor::FixedSampleRate; 138 d4.sampleType = OutputDescriptor::FixedSampleRate;
433 d4.hasDuration = false; 139 d4.hasDuration = false;
434 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 140 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
435 list.push_back(d4); 141 list.push_back(d4);
142 m_outputChroma = index++;
436 143
437 OutputDescriptor d5; 144 OutputDescriptor d5;
438 d5.identifier = "basschroma"; 145 d5.identifier = "basschroma";
439 d5.name = "Bass Chromagram"; 146 d5.name = "Bass Chromagram";
440 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range."; 147 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
446 d5.isQuantized = false; 153 d5.isQuantized = false;
447 d5.sampleType = OutputDescriptor::FixedSampleRate; 154 d5.sampleType = OutputDescriptor::FixedSampleRate;
448 d5.hasDuration = false; 155 d5.hasDuration = false;
449 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 156 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
450 list.push_back(d5); 157 list.push_back(d5);
158 m_outputBassChroma = index++;
451 159
452 OutputDescriptor d6; 160 OutputDescriptor d6;
453 d6.identifier = "bothchroma"; 161 d6.identifier = "bothchroma";
454 d6.name = "Chromagram and Bass Chromagram"; 162 d6.name = "Chromagram and Bass Chromagram";
455 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription."; 163 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
461 d6.isQuantized = false; 169 d6.isQuantized = false;
462 d6.sampleType = OutputDescriptor::FixedSampleRate; 170 d6.sampleType = OutputDescriptor::FixedSampleRate;
463 d6.hasDuration = false; 171 d6.hasDuration = false;
464 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 172 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
465 list.push_back(d6); 173 list.push_back(d6);
466 174 m_outputBothChroma = index++;
467 OutputDescriptor d7;
468 d7.identifier = "simplechord";
469 d7.name = "Simple Chord Estimate";
470 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
471 d7.unit = "";
472 d7.hasFixedBinCount = true;
473 d7.binCount = 0;
474 d7.hasKnownExtents = false;
475 d7.isQuantized = false;
476 d7.sampleType = OutputDescriptor::VariableSampleRate;
477 d7.hasDuration = false;
478 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
479 list.push_back(d7);
480
481 //
482 // OutputDescriptor d9;
483 // d9.identifier = "inconsistencysegment";
484 // d9.name = "Harmonic inconsistency segmenter";
485 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
486 // d9.unit = "";
487 // d9.hasFixedBinCount = true;
488 // d9.binCount = 0;
489 // d9.hasKnownExtents = true;
490 // d9.minValue = 0.1;
491 // d9.maxValue = 0.9;
492 // d9.isQuantized = false;
493 // d9.sampleType = OutputDescriptor::VariableSampleRate;
494 // d9.hasDuration = false;
495 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
496 // list.push_back(d9);
497 //
498 OutputDescriptor d10;
499 d10.identifier = "localtuning";
500 d10.name = "Local tuning";
501 d10.description = "Tuning based on the history up to this timestamp.";
502 d10.unit = "Hz";
503 d10.hasFixedBinCount = true;
504 d10.binCount = 1;
505 d10.hasKnownExtents = true;
506 d10.minValue = 427.47;
507 d10.maxValue = 452.89;
508 d10.isQuantized = false;
509 d10.sampleType = OutputDescriptor::FixedSampleRate;
510 d10.hasDuration = false;
511 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
512 list.push_back(d10);
513
514 OutputDescriptor d8;
515 d8.identifier = "harmonicchange";
516 d8.name = "Harmonic change value";
517 d8.description = "Harmonic change.";
518 d8.unit = "";
519 d8.hasFixedBinCount = true;
520 d8.binCount = 1;
521 d8.hasKnownExtents = true;
522 d8.minValue = 0.0;
523 d8.maxValue = 0.999;
524 d8.isQuantized = false;
525 d8.sampleType = OutputDescriptor::FixedSampleRate;
526 d8.hasDuration = false;
527 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
528 list.push_back(d8);
529 175
530 return list; 176 return list;
531 } 177 }
532 178
533 179
535 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize) 181 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
536 { 182 {
537 if (debug_on) { 183 if (debug_on) {
538 cerr << "--> initialise"; 184 cerr << "--> initialise";
539 } 185 }
540 186
541 if (channels < getMinChannelCount() || 187 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
542 channels > getMaxChannelCount()) return false; 188 return false;
543 m_blockSize = blockSize; 189 }
544 m_stepSize = stepSize; 190
545 frameCount = 0;
546 int tempn = 256 * m_blockSize/2;
547 // cerr << "length of tempkernel : " << tempn << endl;
548 float *tempkernel;
549
550 tempkernel = new float[tempn];
551
552 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
553 m_kernelValue.clear();
554 m_kernelFftIndex.clear();
555 m_kernelNoteIndex.clear();
556 int countNonzero = 0;
557 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
558 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
559 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
560 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
561 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
562 countNonzero++;
563 }
564 m_kernelFftIndex.push_back(iFFT);
565 m_kernelNoteIndex.push_back(iNote);
566 }
567 }
568 }
569 // cerr << "nonzero count : " << countNonzero << endl;
570 delete [] tempkernel;
571 ofstream myfile;
572 myfile.open ("matrix.txt");
573 // myfile << "Writing this to a file.\n";
574 for (int i = 0; i < nNote * 84; ++i) {
575 myfile << m_dict[i] << endl;
576 }
577 myfile.close();
578 return true; 191 return true;
579 } 192 }
580 193
581 void 194 void
582 NNLSChroma::reset() 195 NNLSChroma::reset()
583 { 196 {
584 if (debug_on) cerr << "--> reset"; 197 if (debug_on) cerr << "--> reset";
585 198 NNLSBase::reset();
586 // Clear buffers, reset stored values, etc
587 frameCount = 0;
588 m_dictID = 0;
589 m_fl.clear();
590 m_meanTuning0 = 0;
591 m_meanTuning1 = 0;
592 m_meanTuning2 = 0;
593 m_localTuning0 = 0;
594 m_localTuning1 = 0;
595 m_localTuning2 = 0;
596 m_localTuning.clear();
597 } 199 }
598 200
599 NNLSChroma::FeatureSet 201 NNLSChroma::FeatureSet
600 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 202 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
601 { 203 {
602 if (debug_on) cerr << "--> process" << endl; 204 if (debug_on) cerr << "--> process" << endl;
603 frameCount++; 205
604 float *magnitude = new float[m_blockSize/2]; 206 NNLSBase::baseProcess(inputBuffers, timestamp);
605
606 Feature f10; // local tuning
607 f10.hasTimestamp = true;
608 f10.timestamp = timestamp;
609 const float *fbuf = inputBuffers[0];
610 float energysum = 0;
611 // make magnitude
612 float maxmag = -10000;
613 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
614 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
615 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
616 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
617 if (m_rollon > 0) {
618 energysum += pow(magnitude[iBin],2);
619 }
620 }
621
622 float cumenergy = 0;
623 if (m_rollon > 0) {
624 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
625 cumenergy += pow(magnitude[iBin],2);
626 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
627 else break;
628 }
629 }
630
631 if (maxmag < 2) {
632 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
633 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
634 magnitude[iBin] = 0;
635 }
636 }
637
638 // note magnitude mapping using pre-calculated matrix
639 float *nm = new float[nNote]; // note magnitude
640 for (size_t iNote = 0; iNote < nNote; iNote++) {
641 nm[iNote] = 0; // initialise as 0
642 }
643 int binCount = 0;
644 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
645 // cerr << ".";
646 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
647 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
648 binCount++;
649 }
650 // cerr << nm[20];
651 // cerr << endl;
652
653
654 float one_over_N = 1.0/frameCount;
655 // update means of complex tuning variables
656 m_meanTuning0 *= float(frameCount-1)*one_over_N;
657 m_meanTuning1 *= float(frameCount-1)*one_over_N;
658 m_meanTuning2 *= float(frameCount-1)*one_over_N;
659
660 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
661 m_meanTuning0 += nm[iTone + 0]*one_over_N;
662 m_meanTuning1 += nm[iTone + 1]*one_over_N;
663 m_meanTuning2 += nm[iTone + 2]*one_over_N;
664 float ratioOld = 0.997;
665 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
666 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
667 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
668 }
669
670 // if (m_tuneLocal) {
671 // local tuning
672 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
673 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
674 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
675 m_localTuning.push_back(normalisedtuning);
676 float tuning440 = 440 * pow(2,normalisedtuning/12);
677 f10.values.push_back(tuning440);
678 // cerr << tuning440 << endl;
679 // }
680
681 Feature f1; // logfreqspec
682 f1.hasTimestamp = true;
683 f1.timestamp = timestamp;
684 for (size_t iNote = 0; iNote < nNote; iNote++) {
685 f1.values.push_back(nm[iNote]);
686 }
687 207
688 FeatureSet fs; 208 FeatureSet fs;
689 fs[1].push_back(f1); 209 fs[m_outputLogSpec].push_back(m_logSpectrum[m_logSpectrum.size()-1]);
690 fs[8].push_back(f10);
691
692 // deletes
693 delete[] magnitude;
694 delete[] nm;
695
696 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
697 char * pPath;
698 pPath = getenv ("VAMP_PATH");
699
700
701 return fs; 210 return fs;
702 } 211 }
703 212
704 NNLSChroma::FeatureSet 213 NNLSChroma::FeatureSet
705 NNLSChroma::getRemainingFeatures() 214 NNLSChroma::getRemainingFeatures()
706 { 215 {
707 if (debug_on) cerr << "--> getRemainingFeatures" << endl; 216 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
708 FeatureSet fsOut; 217 FeatureSet fsOut;
709 if (m_fl.size() == 0) return fsOut; 218 if (m_logSpectrum.size() == 0) return fsOut;
710 int nChord = m_chordnames.size();
711 // 219 //
712 /** Calculate Tuning 220 /** Calculate Tuning
713 calculate tuning from (using the angle of the complex number defined by the 221 calculate tuning from (using the angle of the complex number defined by the
714 cumulative mean real and imag values) 222 cumulative mean real and imag values)
715 **/ 223 **/
724 232
725 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); 233 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
726 234
727 // cerr << "normalisedtuning: " << normalisedtuning << '\n'; 235 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
728 236
729 // push tuning to FeatureSet fsOut
730 Feature f0; // tuning
731 f0.hasTimestamp = true;
732 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
733 f0.label = buffer0;
734 fsOut[0].push_back(f0);
735
736 /** Tune Log-Frequency Spectrogram 237 /** Tune Log-Frequency Spectrogram
737 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to 238 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
738 perform linear interpolation on the existing log-frequency spectrogram (kinda f1). 239 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
739 **/ 240 **/
740 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; 241 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
743 float dbThreshold = 0; // relative to the background spectrum 244 float dbThreshold = 0; // relative to the background spectrum
744 float thresh = pow(10,dbThreshold/20); 245 float thresh = pow(10,dbThreshold/20);
745 // cerr << "tune local ? " << m_tuneLocal << endl; 246 // cerr << "tune local ? " << m_tuneLocal << endl;
746 int count = 0; 247 int count = 0;
747 248
748 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { 249 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
749 Feature f1 = *i; 250 Feature f1 = *i;
750 Feature f2; // tuned log-frequency spectrum 251 Feature f2; // tuned log-frequency spectrum
751 f2.hasTimestamp = true; 252 f2.hasTimestamp = true;
752 f2.timestamp = f1.timestamp; 253 f2.timestamp = f1.timestamp;
753 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero 254 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
781 } 282 }
782 if (f2.values[i] < 0) { 283 if (f2.values[i] < 0) {
783 cerr << "ERROR: negative value in logfreq spectrum" << endl; 284 cerr << "ERROR: negative value in logfreq spectrum" << endl;
784 } 285 }
785 } 286 }
786 fsOut[2].push_back(f2); 287 fsOut[m_outputTunedSpec].push_back(f2);
787 count++; 288 count++;
788 } 289 }
789 cerr << "done." << endl; 290 cerr << "done." << endl;
790 291
791 /** Semitone spectrum and chromagrams 292 /** Semitone spectrum and chromagrams
799 } else { 300 } else {
800 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; 301 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
801 } 302 }
802 303
803 304
804 vector<vector<float> > chordogram;
805 vector<vector<int> > scoreChordogram;
806 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
807 vector<float> oldchroma = vector<float>(12,0); 305 vector<float> oldchroma = vector<float>(12,0);
808 vector<float> oldbasschroma = vector<float>(12,0); 306 vector<float> oldbasschroma = vector<float>(12,0);
809 count = 0; 307 count = 0;
810 308
811 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { 309 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
825 f5.timestamp = f2.timestamp; 323 f5.timestamp = f2.timestamp;
826 324
827 f6.hasTimestamp = true; 325 f6.hasTimestamp = true;
828 f6.timestamp = f2.timestamp; 326 f6.timestamp = f2.timestamp;
829 327
830 float b[256]; 328 float b[256];
831 329
832 bool some_b_greater_zero = false; 330 bool some_b_greater_zero = false;
833 float sumb = 0; 331 float sumb = 0;
834 for (int i = 0; i < 256; i++) { 332 for (int i = 0; i < 256; i++) {
835 // b[i] = m_dict[(256 * count + i) % (256 * 84)]; 333 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
859 basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; 357 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
860 iSemitone++; 358 iSemitone++;
861 } 359 }
862 360
863 } else { 361 } else {
864 float x[84+1000]; 362 float x[84+1000];
865 for (int i = 1; i < 1084; ++i) x[i] = 1.0; 363 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
866 vector<int> signifIndex; 364 vector<int> signifIndex;
867 int index=0; 365 int index=0;
868 sumb /= 84.0; 366 sumb /= 84.0;
869 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { 367 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
870 float currval = 0; 368 float currval = 0;
871 currval += b[iNote + 1 + -1]; 369 currval += b[iNote + 1 + -1];
872 currval += b[iNote + 1 + 0]; 370 currval += b[iNote + 1 + 0];
873 currval += b[iNote + 1 + 1]; 371 currval += b[iNote + 1 + 1];
874 if (currval > 0) signifIndex.push_back(index); 372 if (currval > 0) signifIndex.push_back(index);
875 f3.values.push_back(0); // fill the values, change later 373 f3.values.push_back(0); // fill the values, change later
876 index++; 374 index++;
877 } 375 }
878 float rnorm; 376 float rnorm;
879 float w[84+1000]; 377 float w[84+1000];
880 float zz[84+1000]; 378 float zz[84+1000];
881 int indx[84+1000]; 379 int indx[84+1000];
882 int mode; 380 int mode;
883 int dictsize = 256*signifIndex.size(); 381 int dictsize = 256*signifIndex.size();
884 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; 382 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
885 float *curr_dict = new float[dictsize]; 383 float *curr_dict = new float[dictsize];
886 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { 384 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
887 for (unsigned iBin = 0; iBin < 256; iBin++) { 385 for (unsigned iBin = 0; iBin < 256; iBin++) {
888 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; 386 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
889 } 387 }
890 } 388 }
891 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); 389 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
892 delete [] curr_dict; 390 delete [] curr_dict;
893 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { 391 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
894 f3.values[signifIndex[iNote]] = x[iNote]; 392 f3.values[signifIndex[iNote]] = x[iNote];
895 // cerr << mode << endl; 393 // cerr << mode << endl;
896 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; 394 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
897 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; 395 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
898 } 396 }
899 } 397 }
900 } 398 }
901
902
903
904 399
905 f4.values = chroma; 400 f4.values = chroma;
906 f5.values = basschroma; 401 f5.values = basschroma;
907 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas 402 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
908 f6.values = chroma; 403 f6.values = chroma;
956 if (chromanorm[2] > 0) { 451 if (chromanorm[2] > 0) {
957 for (int i = 0; i < f6.values.size(); i++) { 452 for (int i = 0; i < f6.values.size(); i++) {
958 f6.values[i] /= chromanorm[2]; 453 f6.values[i] /= chromanorm[2];
959 } 454 }
960 } 455 }
961
962 } 456 }
963 457
964 // local chord estimation 458 fsOut[m_outputSemiSpec].push_back(f3);
965 vector<float> currentChordSalience; 459 fsOut[m_outputChroma].push_back(f4);
966 float tempchordvalue = 0; 460 fsOut[m_outputBassChroma].push_back(f5);
967 float sumchordvalue = 0; 461 fsOut[m_outputBothChroma].push_back(f6);
968
969 for (int iChord = 0; iChord < nChord; iChord++) {
970 tempchordvalue = 0;
971 for (int iBin = 0; iBin < 12; iBin++) {
972 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
973 }
974 for (int iBin = 12; iBin < 24; iBin++) {
975 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
976 }
977 sumchordvalue+=tempchordvalue;
978 currentChordSalience.push_back(tempchordvalue);
979 }
980 if (sumchordvalue > 0) {
981 for (int iChord = 0; iChord < nChord; iChord++) {
982 currentChordSalience[iChord] /= sumchordvalue;
983 }
984 } else {
985 currentChordSalience[nChord-1] = 1.0;
986 }
987 chordogram.push_back(currentChordSalience);
988
989 fsOut[3].push_back(f3);
990 fsOut[4].push_back(f4);
991 fsOut[5].push_back(f5);
992 fsOut[6].push_back(f6);
993 count++; 462 count++;
994 } 463 }
995 cerr << "done." << endl; 464 cerr << "done." << endl;
996 465
997
998 /* Simple chord estimation
999 I just take the local chord estimates ("currentChordSalience") and average them over time, then
1000 take the maximum. Very simple, don't do this at home...
1001 */
1002 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
1003 count = 0;
1004 int halfwindowlength = m_inputSampleRate / m_stepSize;
1005 vector<int> chordSequence;
1006 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
1007 vector<int> temp = vector<int>(nChord,0);
1008 scoreChordogram.push_back(temp);
1009 }
1010 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
1011 int startIndex = count + 1;
1012 int endIndex = count + 2 * halfwindowlength;
1013
1014 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
1015
1016 vector<int> chordCandidates;
1017 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
1018 // float currsum = 0;
1019 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
1020 // currsum += chordogram[iFrame][iChord];
1021 // }
1022 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
1023 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
1024 if (chordogram[iFrame][iChord] > chordThreshold) {
1025 chordCandidates.push_back(iChord);
1026 break;
1027 }
1028 }
1029 }
1030 chordCandidates.push_back(nChord-1);
1031 // cerr << chordCandidates.size() << endl;
1032
1033 float maxval = 0; // will be the value of the most salient *chord change* in this frame
1034 float maxindex = 0; //... and the index thereof
1035 unsigned bestchordL = nChord-1; // index of the best "left" chord
1036 unsigned bestchordR = nChord-1; // index of the best "right" chord
1037
1038 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
1039 // now find the max values on both sides of iWF
1040 // left side:
1041 float maxL = 0;
1042 unsigned maxindL = nChord-1;
1043 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
1044 unsigned iChord = chordCandidates[kChord];
1045 float currsum = 0;
1046 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
1047 currsum += chordogram[count+iFrame][iChord];
1048 }
1049 if (iChord == nChord-1) currsum *= 0.8;
1050 if (currsum > maxL) {
1051 maxL = currsum;
1052 maxindL = iChord;
1053 }
1054 }
1055 // right side:
1056 float maxR = 0;
1057 unsigned maxindR = nChord-1;
1058 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
1059 unsigned iChord = chordCandidates[kChord];
1060 float currsum = 0;
1061 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
1062 currsum += chordogram[count+iFrame][iChord];
1063 }
1064 if (iChord == nChord-1) currsum *= 0.8;
1065 if (currsum > maxR) {
1066 maxR = currsum;
1067 maxindR = iChord;
1068 }
1069 }
1070 if (maxL+maxR > maxval) {
1071 maxval = maxL+maxR;
1072 maxindex = iWF;
1073 bestchordL = maxindL;
1074 bestchordR = maxindR;
1075 }
1076
1077 }
1078 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
1079 // add a score to every chord-frame-point that was part of a maximum
1080 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
1081 scoreChordogram[iFrame+count][bestchordL]++;
1082 }
1083 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
1084 scoreChordogram[iFrame+count][bestchordR]++;
1085 }
1086 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
1087 count++;
1088 }
1089 // cerr << "******* agent finished *******" << endl;
1090 count = 0;
1091 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
1092 float maxval = 0; // will be the value of the most salient chord in this frame
1093 float maxindex = 0; //... and the index thereof
1094 for (unsigned iChord = 0; iChord < nChord; iChord++) {
1095 if (scoreChordogram[count][iChord] > maxval) {
1096 maxval = scoreChordogram[count][iChord];
1097 maxindex = iChord;
1098 // cerr << iChord << endl;
1099 }
1100 }
1101 chordSequence.push_back(maxindex);
1102 // cerr << "before modefilter, maxindex: " << maxindex << endl;
1103 count++;
1104 }
1105 // cerr << "******* mode filter done *******" << endl;
1106
1107
1108 // mode filter on chordSequence
1109 count = 0;
1110 string oldChord = "";
1111 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
1112 Feature f6 = *it;
1113 Feature f7; // chord estimate
1114 f7.hasTimestamp = true;
1115 f7.timestamp = f6.timestamp;
1116 Feature f8; // chord estimate
1117 f8.hasTimestamp = true;
1118 f8.timestamp = f6.timestamp;
1119
1120 vector<int> chordCount = vector<int>(nChord,0);
1121 int maxChordCount = 0;
1122 int maxChordIndex = nChord-1;
1123 string maxChord;
1124 int startIndex = max(count - halfwindowlength/2,0);
1125 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
1126 for (int i = startIndex; i < endIndex; i++) {
1127 chordCount[chordSequence[i]]++;
1128 if (chordCount[chordSequence[i]] > maxChordCount) {
1129 // cerr << "start index " << startIndex << endl;
1130 maxChordCount++;
1131 maxChordIndex = chordSequence[i];
1132 maxChord = m_chordnames[maxChordIndex];
1133 }
1134 }
1135 // chordSequence[count] = maxChordIndex;
1136 // cerr << maxChordIndex << endl;
1137 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
1138 // cerr << chordchange[count] << endl;
1139 fsOut[9].push_back(f8);
1140 if (oldChord != maxChord) {
1141 oldChord = maxChord;
1142
1143 // char buffer1 [50];
1144 // if (maxChordIndex < nChord - 1) {
1145 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
1146 // } else {
1147 // sprintf(buffer1, "N");
1148 // }
1149 // f7.label = buffer1;
1150 f7.label = m_chordnames[maxChordIndex];
1151 fsOut[7].push_back(f7);
1152 }
1153 count++;
1154 }
1155 Feature f7; // last chord estimate
1156 f7.hasTimestamp = true;
1157 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
1158 f7.label = "N";
1159 fsOut[7].push_back(f7);
1160 cerr << "done." << endl;
1161 // // musicity
1162 // count = 0;
1163 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
1164 // vector<float> musicityValue;
1165 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
1166 // Feature f4 = *it;
1167 //
1168 // int startIndex = max(count - musicitykernelwidth/2,0);
1169 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
1170 // float chromasum = 0;
1171 // float diffsum = 0;
1172 // for (int k = 0; k < 12; k++) {
1173 // for (int i = startIndex + 1; i < endIndex; i++) {
1174 // chromasum += pow(fsOut[4][i].values[k],2);
1175 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
1176 // }
1177 // }
1178 // diffsum /= chromasum;
1179 // musicityValue.push_back(diffsum);
1180 // count++;
1181 // }
1182 //
1183 // float musicityThreshold = 0.44;
1184 // if (m_stepSize == 4096) {
1185 // musicityThreshold = 0.74;
1186 // }
1187 // if (m_stepSize == 4410) {
1188 // musicityThreshold = 0.77;
1189 // }
1190 //
1191 // count = 0;
1192 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
1193 // Feature f4 = *it;
1194 // Feature f8; // musicity
1195 // Feature f9; // musicity segmenter
1196 //
1197 // f8.hasTimestamp = true;
1198 // f8.timestamp = f4.timestamp;
1199 // f9.hasTimestamp = true;
1200 // f9.timestamp = f4.timestamp;
1201 //
1202 // int startIndex = max(count - musicitykernelwidth/2,0);
1203 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
1204 // int musicityCount = 0;
1205 // for (int i = startIndex; i <= endIndex; i++) {
1206 // if (musicityValue[i] > musicityThreshold) musicityCount++;
1207 // }
1208 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
1209 //
1210 // if (isSpeech) {
1211 // if (oldlabeltype != 2) {
1212 // f9.label = "Speech";
1213 // fsOut[9].push_back(f9);
1214 // oldlabeltype = 2;
1215 // }
1216 // } else {
1217 // if (oldlabeltype != 1) {
1218 // f9.label = "Music";
1219 // fsOut[9].push_back(f9);
1220 // oldlabeltype = 1;
1221 // }
1222 // }
1223 // f8.values.push_back(musicityValue[count]);
1224 // fsOut[8].push_back(f8);
1225 // count++;
1226 // }
1227 return fsOut; 466 return fsOut;
1228 467
1229 } 468 }
1230 469