Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
matthiasm@0
|
19 #include "NNLSChroma.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
Chris@27
|
22
|
Chris@27
|
23 #include <cstdlib>
|
Chris@27
|
24 #include <fstream>
|
matthiasm@0
|
25 #include <cmath>
|
matthiasm@9
|
26
|
Chris@27
|
27 #include <algorithm>
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
matthiasm@0
|
31 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
Chris@35
|
32 NNLSBase(inputSampleRate)
|
matthiasm@0
|
33 {
|
Chris@23
|
34 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@0
|
35 }
|
matthiasm@0
|
36
|
matthiasm@0
|
37 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
38 {
|
Chris@23
|
39 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@0
|
40 }
|
matthiasm@0
|
41
|
matthiasm@0
|
42 string
|
matthiasm@0
|
43 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
44 {
|
Chris@23
|
45 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@46
|
46 return "nnls-chroma";
|
matthiasm@0
|
47 }
|
matthiasm@0
|
48
|
matthiasm@0
|
49 string
|
matthiasm@0
|
50 NNLSChroma::getName() const
|
matthiasm@0
|
51 {
|
Chris@23
|
52 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
53 return "NNLS Chroma";
|
matthiasm@0
|
54 }
|
matthiasm@0
|
55
|
matthiasm@0
|
56 string
|
matthiasm@0
|
57 NNLSChroma::getDescription() const
|
matthiasm@0
|
58 {
|
Chris@23
|
59 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
60 return "This plugin provides a number of features derived from a DFT-based log-frequency amplitude spectrum: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; and based on this semitone spectrum, different chroma features.";
|
matthiasm@0
|
61 }
|
matthiasm@0
|
62
|
matthiasm@0
|
63 NNLSChroma::OutputList
|
matthiasm@0
|
64 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
65 {
|
Chris@23
|
66 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
67 OutputList list;
|
matthiasm@0
|
68
|
matthiasm@0
|
69 // Make chroma names for the binNames property
|
matthiasm@120
|
70
|
matthiasm@120
|
71 const char* notenames[24] = {
|
matthiasm@120
|
72 "A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@120
|
73 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@120
|
74
|
matthiasm@120
|
75
|
matthiasm@0
|
76 vector<string> chromanames;
|
matthiasm@0
|
77 vector<string> bothchromanames;
|
matthiasm@0
|
78 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
79 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
80 if (iNote < 12) {
|
matthiasm@43
|
81 chromanames.push_back(notenames[iNote+12]);
|
matthiasm@0
|
82 }
|
matthiasm@0
|
83 }
|
matthiasm@0
|
84
|
Chris@35
|
85 int index = 0;
|
matthiasm@0
|
86
|
mail@117
|
87 OutputDescriptor logfreqspecOutput;
|
mail@117
|
88 logfreqspecOutput.identifier = "logfreqspec";
|
mail@117
|
89 logfreqspecOutput.name = "Log-Frequency Spectrum";
|
mail@117
|
90 logfreqspecOutput.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
mail@117
|
91 logfreqspecOutput.unit = "";
|
mail@117
|
92 logfreqspecOutput.hasFixedBinCount = true;
|
mail@117
|
93 logfreqspecOutput.binCount = nNote;
|
mail@117
|
94 logfreqspecOutput.hasKnownExtents = false;
|
mail@117
|
95 logfreqspecOutput.isQuantized = false;
|
mail@117
|
96 logfreqspecOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@117
|
97 logfreqspecOutput.hasDuration = false;
|
mail@117
|
98 logfreqspecOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@117
|
99 list.push_back(logfreqspecOutput);
|
mail@117
|
100 m_outputLogfreqspec = index++;
|
matthiasm@0
|
101
|
mail@117
|
102 OutputDescriptor tunedlogfreqspecOutput;
|
mail@117
|
103 tunedlogfreqspecOutput.identifier = "tunedlogfreqspec";
|
mail@117
|
104 tunedlogfreqspecOutput.name = "Tuned Log-Frequency Spectrum";
|
mail@117
|
105 tunedlogfreqspecOutput.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
mail@117
|
106 tunedlogfreqspecOutput.unit = "";
|
mail@117
|
107 tunedlogfreqspecOutput.hasFixedBinCount = true;
|
mail@117
|
108 tunedlogfreqspecOutput.binCount = nNote;
|
mail@117
|
109 tunedlogfreqspecOutput.hasKnownExtents = false;
|
mail@117
|
110 tunedlogfreqspecOutput.isQuantized = false;
|
mail@117
|
111 tunedlogfreqspecOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@117
|
112 tunedlogfreqspecOutput.hasDuration = false;
|
mail@117
|
113 tunedlogfreqspecOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@117
|
114 list.push_back(tunedlogfreqspecOutput);
|
mail@117
|
115 m_outputTunedlogfreqspec = index++;
|
matthiasm@0
|
116
|
mail@117
|
117 OutputDescriptor semitonespectrumOutput;
|
mail@117
|
118 semitonespectrumOutput.identifier = "semitonespectrum";
|
mail@117
|
119 semitonespectrumOutput.name = "Semitone Spectrum";
|
mail@117
|
120 semitonespectrumOutput.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
mail@117
|
121 semitonespectrumOutput.unit = "";
|
mail@117
|
122 semitonespectrumOutput.hasFixedBinCount = true;
|
mail@117
|
123 semitonespectrumOutput.binCount = 84;
|
mail@117
|
124 semitonespectrumOutput.hasKnownExtents = false;
|
mail@117
|
125 semitonespectrumOutput.isQuantized = false;
|
mail@117
|
126 semitonespectrumOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@117
|
127 semitonespectrumOutput.hasDuration = false;
|
mail@117
|
128 semitonespectrumOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@117
|
129 list.push_back(semitonespectrumOutput);
|
mail@117
|
130 m_outputSemitonespectrum = index++;
|
matthiasm@0
|
131
|
mail@117
|
132 OutputDescriptor chromaOutput;
|
mail@117
|
133 chromaOutput.identifier = "chroma";
|
mail@117
|
134 chromaOutput.name = "Chromagram";
|
mail@117
|
135 chromaOutput.description = "Tuning-adjusted chromagram from NNLS approximate transcription, with an emphasis on the medium note range.";
|
mail@117
|
136 chromaOutput.unit = "";
|
mail@117
|
137 chromaOutput.hasFixedBinCount = true;
|
mail@117
|
138 chromaOutput.binCount = 12;
|
mail@117
|
139 chromaOutput.binNames = chromanames;
|
mail@117
|
140 chromaOutput.hasKnownExtents = false;
|
mail@117
|
141 chromaOutput.isQuantized = false;
|
mail@117
|
142 chromaOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@117
|
143 chromaOutput.hasDuration = false;
|
mail@117
|
144 chromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@117
|
145 list.push_back(chromaOutput);
|
Chris@35
|
146 m_outputChroma = index++;
|
matthiasm@0
|
147
|
mail@116
|
148 OutputDescriptor basschromaOutput;
|
mail@116
|
149 basschromaOutput.identifier = "basschroma";
|
mail@116
|
150 basschromaOutput.name = "Bass Chromagram";
|
mail@116
|
151 basschromaOutput.description = "Tuning-adjusted bass chromagram from NNLS approximate transcription, with an emphasis on the bass note range.";
|
mail@116
|
152 basschromaOutput.unit = "";
|
mail@116
|
153 basschromaOutput.hasFixedBinCount = true;
|
mail@116
|
154 basschromaOutput.binCount = 12;
|
mail@116
|
155 basschromaOutput.binNames = chromanames;
|
mail@116
|
156 basschromaOutput.hasKnownExtents = false;
|
mail@116
|
157 basschromaOutput.isQuantized = false;
|
mail@116
|
158 basschromaOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@116
|
159 basschromaOutput.hasDuration = false;
|
mail@116
|
160 basschromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@116
|
161 list.push_back(basschromaOutput);
|
mail@117
|
162 m_outputBasschroma = index++;
|
matthiasm@0
|
163
|
mail@116
|
164 OutputDescriptor bothchromaOutput;
|
mail@116
|
165 bothchromaOutput.identifier = "bothchroma";
|
mail@116
|
166 bothchromaOutput.name = "Chromagram and Bass Chromagram";
|
mail@116
|
167 bothchromaOutput.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription.";
|
mail@116
|
168 bothchromaOutput.unit = "";
|
mail@116
|
169 bothchromaOutput.hasFixedBinCount = true;
|
mail@116
|
170 bothchromaOutput.binCount = 24;
|
mail@116
|
171 bothchromaOutput.binNames = bothchromanames;
|
mail@116
|
172 bothchromaOutput.hasKnownExtents = false;
|
mail@116
|
173 bothchromaOutput.isQuantized = false;
|
mail@116
|
174 bothchromaOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@116
|
175 bothchromaOutput.hasDuration = false;
|
mail@116
|
176 bothchromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@116
|
177 list.push_back(bothchromaOutput);
|
mail@117
|
178 m_outputBothchroma = index++;
|
matthiasm@1
|
179
|
mail@116
|
180 OutputDescriptor consonanceOutput;
|
mail@116
|
181 consonanceOutput.identifier = "consonance";
|
mail@116
|
182 consonanceOutput.name = "Consonance estimate.";
|
mail@116
|
183 consonanceOutput.description = "A simple consonance value based on the convolution of a consonance profile with the semitone spectrum.";
|
mail@116
|
184 consonanceOutput.unit = "";
|
mail@116
|
185 consonanceOutput.hasFixedBinCount = true;
|
mail@116
|
186 consonanceOutput.binCount = 1;
|
mail@116
|
187 consonanceOutput.hasKnownExtents = false;
|
mail@116
|
188 consonanceOutput.isQuantized = false;
|
mail@116
|
189 consonanceOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@116
|
190 consonanceOutput.hasDuration = false;
|
mail@116
|
191 consonanceOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@116
|
192 list.push_back(consonanceOutput);
|
mail@83
|
193 m_outputConsonance = index++;
|
mail@83
|
194
|
mail@116
|
195 OutputDescriptor monophonicnessOutput;
|
mail@116
|
196 monophonicnessOutput.identifier = "monophonicness";
|
mail@116
|
197 monophonicnessOutput.name = "Monophonicness estimate.";
|
mail@116
|
198 monophonicnessOutput.description = ".";
|
mail@116
|
199 monophonicnessOutput.unit = "";
|
mail@116
|
200 monophonicnessOutput.hasFixedBinCount = true;
|
mail@116
|
201 monophonicnessOutput.binCount = 1;
|
mail@116
|
202 monophonicnessOutput.hasKnownExtents = true;
|
mail@116
|
203 monophonicnessOutput.minValue = 0;
|
mail@116
|
204 monophonicnessOutput.maxValue = 1;
|
mail@116
|
205 monophonicnessOutput.isQuantized = false;
|
mail@116
|
206 monophonicnessOutput.sampleType = OutputDescriptor::FixedSampleRate;
|
mail@116
|
207 monophonicnessOutput.hasDuration = false;
|
mail@116
|
208 monophonicnessOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
mail@116
|
209 list.push_back(monophonicnessOutput);
|
matthiasm@105
|
210 m_outputMonophonicness = index++;
|
matthiasm@105
|
211
|
matthiasm@0
|
212 return list;
|
matthiasm@0
|
213 }
|
matthiasm@0
|
214
|
matthiasm@0
|
215
|
matthiasm@0
|
216 bool
|
matthiasm@0
|
217 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
218 {
|
Chris@23
|
219 if (debug_on) {
|
Chris@23
|
220 cerr << "--> initialise";
|
Chris@23
|
221 }
|
matthiasm@1
|
222
|
Chris@35
|
223 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
224 return false;
|
Chris@35
|
225 }
|
matthiasm@1
|
226
|
matthiasm@0
|
227 return true;
|
matthiasm@0
|
228 }
|
matthiasm@0
|
229
|
matthiasm@0
|
230 void
|
matthiasm@0
|
231 NNLSChroma::reset()
|
matthiasm@0
|
232 {
|
Chris@23
|
233 if (debug_on) cerr << "--> reset";
|
Chris@35
|
234 NNLSBase::reset();
|
matthiasm@0
|
235 }
|
matthiasm@0
|
236
|
matthiasm@0
|
237 NNLSChroma::FeatureSet
|
matthiasm@0
|
238 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
239 {
|
Chris@23
|
240 if (debug_on) cerr << "--> process" << endl;
|
Chris@35
|
241
|
Chris@35
|
242 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
243
|
Chris@23
|
244 FeatureSet fs;
|
mail@117
|
245 fs[m_outputLogfreqspec].push_back(m_logSpectrum[m_logSpectrum.size()-1]);
|
Chris@23
|
246 return fs;
|
matthiasm@0
|
247 }
|
matthiasm@0
|
248
|
matthiasm@0
|
249 NNLSChroma::FeatureSet
|
matthiasm@0
|
250 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
251 {
|
mail@100
|
252 static const int nConsonance = 24;
|
mail@100
|
253 float consonancepattern[nConsonance] = {0,-1,-1,1,1,1,-1,1,1,1,-1,-1,1,-1,-1,1,1,1,-1,1,1,1,-1,-1};
|
mail@100
|
254 float consonancemean = 0;
|
mail@100
|
255 for (int i = 0; i< nConsonance; ++i) {
|
mail@100
|
256 consonancemean += consonancepattern[i]/nConsonance;
|
mail@100
|
257 }
|
mail@84
|
258
|
mail@100
|
259 for (int i = 0; i< nConsonance; ++i) {
|
mail@100
|
260 consonancepattern[i] -= consonancemean;
|
mail@100
|
261 }
|
mail@119
|
262
|
mail@119
|
263 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
264 FeatureSet fsOut;
|
Chris@35
|
265 if (m_logSpectrum.size() == 0) return fsOut;
|
mail@119
|
266
|
Chris@23
|
267 /** Calculate Tuning
|
Chris@23
|
268 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
269 cumulative mean real and imag values)
|
Chris@23
|
270 **/
|
mail@80
|
271 float meanTuningImag = 0;
|
mail@80
|
272 float meanTuningReal = 0;
|
mail@80
|
273 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
274 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
275 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
276 }
|
Chris@23
|
277 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
278 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
279 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
280 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
281
|
Chris@23
|
282 char buffer0 [50];
|
matthiasm@1
|
283
|
Chris@23
|
284 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
mail@119
|
285
|
Chris@23
|
286 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
287 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
288 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
289 **/
|
Chris@23
|
290 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
291
|
Chris@23
|
292 float tempValue = 0;
|
matthiasm@120
|
293
|
Chris@23
|
294 int count = 0;
|
mail@77
|
295
|
matthiasm@1
|
296
|
Chris@35
|
297 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@23
|
298 Feature f1 = *i;
|
Chris@23
|
299 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
300 f2.hasTimestamp = true;
|
Chris@23
|
301 f2.timestamp = f1.timestamp;
|
Chris@23
|
302 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
303
|
matthiasm@85
|
304
|
Chris@23
|
305 if (m_tuneLocal) {
|
Chris@23
|
306 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
307 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
308 }
|
matthiasm@1
|
309
|
mail@80
|
310 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
311
|
matthiasm@122
|
312 for (int k = 2; k < (int)f1.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@80
|
313 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
|
Chris@23
|
314 f2.values.push_back(tempValue);
|
Chris@23
|
315 }
|
matthiasm@1
|
316
|
Chris@23
|
317 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
mail@77
|
318
|
Chris@23
|
319 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
320 vector<float> runningstd;
|
mail@77
|
321 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
322 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
323 }
|
Chris@23
|
324 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
325 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
326 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
327 if (runningstd[i] > 0) {
|
Chris@23
|
328 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
mail@41
|
329 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
330 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
mail@41
|
331 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
332 }
|
Chris@23
|
333 if (f2.values[i] < 0) {
|
Chris@23
|
334 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
335 }
|
Chris@23
|
336 }
|
mail@117
|
337 fsOut[m_outputTunedlogfreqspec].push_back(f2);
|
Chris@23
|
338 count++;
|
Chris@23
|
339 }
|
Chris@23
|
340 cerr << "done." << endl;
|
matthiasm@1
|
341
|
Chris@23
|
342 /** Semitone spectrum and chromagrams
|
Chris@23
|
343 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
344 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
345 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
346 bass and treble stacked onto each other).
|
Chris@23
|
347 **/
|
matthiasm@42
|
348 if (m_useNNLS == 0) {
|
Chris@23
|
349 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
350 } else {
|
Chris@23
|
351 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
352 }
|
matthiasm@13
|
353
|
matthiasm@1
|
354
|
Chris@23
|
355 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
356 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
357 count = 0;
|
matthiasm@9
|
358
|
mail@117
|
359 for (FeatureList::iterator it = fsOut[m_outputTunedlogfreqspec].begin(); it != fsOut[m_outputTunedlogfreqspec].end(); ++it) {
|
Chris@23
|
360 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
361 Feature f3; // semitone spectrum
|
Chris@23
|
362 Feature f4; // treble chromagram
|
Chris@23
|
363 Feature f5; // bass chromagram
|
Chris@23
|
364 Feature f6; // treble and bass chromagram
|
matthiasm@85
|
365 Feature consonance;
|
matthiasm@105
|
366 Feature monophonicness;
|
matthiasm@85
|
367
|
Chris@23
|
368 f3.hasTimestamp = true;
|
Chris@23
|
369 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
370
|
Chris@23
|
371 f4.hasTimestamp = true;
|
Chris@23
|
372 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
373
|
Chris@23
|
374 f5.hasTimestamp = true;
|
Chris@23
|
375 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
376
|
Chris@23
|
377 f6.hasTimestamp = true;
|
Chris@23
|
378 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
379
|
matthiasm@85
|
380 consonance.hasTimestamp = true;
|
matthiasm@85
|
381 consonance.timestamp = f2.timestamp;
|
matthiasm@105
|
382
|
matthiasm@105
|
383 monophonicness.hasTimestamp = true;
|
matthiasm@105
|
384 monophonicness.timestamp = f2.timestamp;
|
matthiasm@85
|
385
|
mail@77
|
386 float b[nNote];
|
matthiasm@1
|
387
|
Chris@23
|
388 bool some_b_greater_zero = false;
|
Chris@23
|
389 float sumb = 0;
|
mail@77
|
390 for (int i = 0; i < nNote; i++) {
|
mail@77
|
391 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
Chris@23
|
392 b[i] = f2.values[i];
|
Chris@23
|
393 sumb += b[i];
|
Chris@23
|
394 if (b[i] > 0) {
|
Chris@23
|
395 some_b_greater_zero = true;
|
Chris@23
|
396 }
|
Chris@23
|
397 }
|
matthiasm@1
|
398
|
Chris@23
|
399 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
400
|
Chris@23
|
401 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
402 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
403 float currval;
|
matthiasm@122
|
404 int iSemitone = 0;
|
matthiasm@1
|
405
|
Chris@23
|
406 if (some_b_greater_zero) {
|
matthiasm@42
|
407 if (m_useNNLS == 0) {
|
matthiasm@122
|
408 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
409 currval = 0;
|
mail@80
|
410 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@80
|
411 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@80
|
412 }
|
Chris@23
|
413 f3.values.push_back(currval);
|
Chris@23
|
414 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
415 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
416 iSemitone++;
|
Chris@23
|
417 }
|
matthiasm@1
|
418
|
Chris@23
|
419 } else {
|
Chris@35
|
420 float x[84+1000];
|
Chris@23
|
421 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
422 vector<int> signifIndex;
|
Chris@23
|
423 int index=0;
|
Chris@23
|
424 sumb /= 84.0;
|
matthiasm@122
|
425 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
426 float currval = 0;
|
mail@80
|
427 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@80
|
428 currval += b[iNote + iBPS];
|
mail@80
|
429 }
|
Chris@23
|
430 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
431 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
432 index++;
|
Chris@23
|
433 }
|
Chris@35
|
434 float rnorm;
|
Chris@35
|
435 float w[84+1000];
|
Chris@35
|
436 float zz[84+1000];
|
Chris@23
|
437 int indx[84+1000];
|
Chris@23
|
438 int mode;
|
mail@77
|
439 int dictsize = nNote*signifIndex.size();
|
Chris@23
|
440 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
441 float *curr_dict = new float[dictsize];
|
Chris@91
|
442 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
443 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
444 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
445 }
|
Chris@23
|
446 }
|
Chris@35
|
447 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
448 delete [] curr_dict;
|
Chris@91
|
449 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
450 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
451 // cerr << mode << endl;
|
Chris@23
|
452 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
453 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
454 }
|
Chris@23
|
455 }
|
matthiasm@79
|
456 } else {
|
matthiasm@79
|
457 for (int i = 0; i < 84; ++i) f3.values.push_back(0);
|
Chris@23
|
458 }
|
matthiasm@85
|
459
|
matthiasm@85
|
460 float notesum = 0;
|
matthiasm@85
|
461
|
matthiasm@85
|
462 consonance.values.push_back(0);
|
matthiasm@104
|
463
|
matthiasm@105
|
464 float note_max = 0;
|
matthiasm@105
|
465 float note_runnerup = 0;
|
matthiasm@105
|
466 // float note_sum = 0;
|
matthiasm@105
|
467 for (int iSemitone = 0; iSemitone < 84; iSemitone++) {
|
matthiasm@105
|
468 float currvalue = f3.values[iSemitone] * treblewindow[iSemitone];
|
matthiasm@105
|
469 if (currvalue > note_max) {
|
matthiasm@105
|
470 note_runnerup = note_max;
|
matthiasm@105
|
471 note_max = currvalue;
|
matthiasm@105
|
472 } else if (currvalue > note_runnerup) {
|
matthiasm@105
|
473 note_runnerup = currvalue;
|
matthiasm@105
|
474 }
|
matthiasm@105
|
475 // note_sum += note[iPitchClass];
|
matthiasm@105
|
476 }
|
matthiasm@105
|
477 // float note_monophonicness = 12*note_max/(12*note_max+note_sum);
|
mail@111
|
478 // cerr << note_max << endl;
|
mail@111
|
479 // cerr << note_runnerup << endl << endl;
|
matthiasm@105
|
480 float note_monophonicness = 0.5;
|
matthiasm@105
|
481 if (note_max > 0) {
|
matthiasm@105
|
482 note_monophonicness = (note_max / (note_max+note_runnerup) - 0.5) * 2;
|
matthiasm@105
|
483 }
|
matthiasm@105
|
484 monophonicness.values.push_back(note_monophonicness);
|
matthiasm@105
|
485
|
matthiasm@104
|
486 for (int iSemitone = 0; iSemitone < 84; ++iSemitone) {
|
matthiasm@104
|
487 float tempconsonance = 0;
|
matthiasm@104
|
488 int sumlength = 1;
|
matthiasm@85
|
489 for (int jSemitone = 1; jSemitone < 24; ++jSemitone) {
|
matthiasm@104
|
490 if (iSemitone+jSemitone > 84-1) break;
|
matthiasm@104
|
491 sumlength++;
|
mail@100
|
492 tempconsonance += f3.values[iSemitone+jSemitone] * (consonancepattern[jSemitone]) * treblewindow[iSemitone+jSemitone];
|
matthiasm@85
|
493 }
|
matthiasm@104
|
494 notesum += f3.values[iSemitone] * f3.values[iSemitone] * treblewindow[iSemitone] * treblewindow[iSemitone] * sumlength;
|
matthiasm@104
|
495 consonance.values[0] += (f3.values[iSemitone] * tempconsonance * treblewindow[iSemitone]) * sumlength;
|
matthiasm@85
|
496 }
|
matthiasm@104
|
497 // cerr << consonance.values[0] << " " << f3.timestamp << " "<< notesum << endl;
|
matthiasm@86
|
498 if (notesum > 0) consonance.values[0] /= notesum;
|
matthiasm@104
|
499
|
matthiasm@85
|
500
|
Chris@23
|
501 f4.values = chroma;
|
Chris@23
|
502 f5.values = basschroma;
|
Chris@23
|
503 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
504 f6.values = chroma;
|
matthiasm@1
|
505
|
Chris@23
|
506 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
507 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
508 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
509 case 0: // should never end up here
|
Chris@23
|
510 break;
|
Chris@23
|
511 case 1:
|
Chris@23
|
512 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
513 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
514 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
515 break;
|
Chris@23
|
516 case 2:
|
Chris@23
|
517 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
518 chromanorm[0] += *it;
|
Chris@23
|
519 }
|
Chris@23
|
520 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
521 chromanorm[1] += *it;
|
Chris@23
|
522 }
|
Chris@23
|
523 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
524 chromanorm[2] += *it;
|
Chris@23
|
525 }
|
Chris@23
|
526 break;
|
Chris@23
|
527 case 3:
|
Chris@23
|
528 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
529 chromanorm[0] += pow(*it,2);
|
Chris@23
|
530 }
|
Chris@23
|
531 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
532 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
533 chromanorm[1] += pow(*it,2);
|
Chris@23
|
534 }
|
Chris@23
|
535 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
536 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
537 chromanorm[2] += pow(*it,2);
|
Chris@23
|
538 }
|
Chris@23
|
539 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
540 break;
|
Chris@23
|
541 }
|
Chris@23
|
542 if (chromanorm[0] > 0) {
|
matthiasm@122
|
543 for (int i = 0; i < (int)f4.values.size(); i++) {
|
Chris@23
|
544 f4.values[i] /= chromanorm[0];
|
Chris@23
|
545 }
|
Chris@23
|
546 }
|
Chris@23
|
547 if (chromanorm[1] > 0) {
|
matthiasm@122
|
548 for (int i = 0; i < (int)f5.values.size(); i++) {
|
Chris@23
|
549 f5.values[i] /= chromanorm[1];
|
Chris@23
|
550 }
|
Chris@23
|
551 }
|
Chris@23
|
552 if (chromanorm[2] > 0) {
|
matthiasm@122
|
553 for (int i = 0; i < (int)f6.values.size(); i++) {
|
Chris@23
|
554 f6.values[i] /= chromanorm[2];
|
Chris@23
|
555 }
|
Chris@23
|
556 }
|
Chris@23
|
557 }
|
matthiasm@13
|
558
|
mail@117
|
559 fsOut[m_outputSemitonespectrum].push_back(f3);
|
Chris@35
|
560 fsOut[m_outputChroma].push_back(f4);
|
mail@117
|
561 fsOut[m_outputBasschroma].push_back(f5);
|
mail@117
|
562 fsOut[m_outputBothchroma].push_back(f6);
|
matthiasm@85
|
563 fsOut[m_outputConsonance].push_back(consonance);
|
matthiasm@105
|
564 fsOut[m_outputMonophonicness].push_back(monophonicness);
|
Chris@23
|
565 count++;
|
Chris@23
|
566 }
|
Chris@23
|
567 cerr << "done." << endl;
|
matthiasm@10
|
568
|
Chris@23
|
569 return fsOut;
|
matthiasm@0
|
570
|
matthiasm@0
|
571 }
|
matthiasm@0
|
572
|