matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@0
|
4 #include <list>
|
matthiasm@0
|
5 #include <iostream>
|
matthiasm@0
|
6 #include <sstream>
|
matthiasm@0
|
7 #include <cassert>
|
matthiasm@0
|
8 #include <cstdio>
|
matthiasm@0
|
9 // #include "cblas.h"
|
matthiasm@0
|
10 #include "chorddict.cpp"
|
matthiasm@0
|
11 using namespace std;
|
matthiasm@0
|
12
|
matthiasm@0
|
13 const float sinvalue = 0.866025404;
|
matthiasm@0
|
14 const float cosvalue = -0.5;
|
matthiasm@0
|
15 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
16 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
17 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
18 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
19 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@0
|
20 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
21 const int nNote = 256;
|
matthiasm@0
|
22
|
matthiasm@0
|
23 /** Special Convolution
|
matthiasm@0
|
24 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
25 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
26 as the first (last) valid convolution bin.
|
matthiasm@0
|
27 **/
|
matthiasm@0
|
28
|
matthiasm@0
|
29 const bool debug_on = false;
|
matthiasm@0
|
30
|
matthiasm@0
|
31 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
32 {
|
matthiasm@0
|
33 float s;
|
matthiasm@0
|
34 int m, n;
|
matthiasm@0
|
35 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
36 int lenKernel = kernel.size();
|
matthiasm@0
|
37
|
matthiasm@0
|
38 vector<float> Z(256,0);
|
matthiasm@0
|
39 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
40
|
matthiasm@0
|
41 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
42 s=0.0;
|
matthiasm@0
|
43 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
44 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
45 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
46 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
47 }
|
matthiasm@0
|
48 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
49 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
50 }
|
matthiasm@0
|
51
|
matthiasm@0
|
52 // fill upper and lower pads
|
matthiasm@0
|
53 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
54 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
55 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
56 return Z;
|
matthiasm@0
|
57 }
|
matthiasm@0
|
58
|
matthiasm@0
|
59 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
60 // {
|
matthiasm@0
|
61 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
62 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
63 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
64 // }
|
matthiasm@0
|
65 // return freq;
|
matthiasm@0
|
66 // }
|
matthiasm@0
|
67
|
matthiasm@0
|
68 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
69 {
|
matthiasm@0
|
70 float recipwidth = 1.0/width;
|
matthiasm@0
|
71 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
72 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
73 }
|
matthiasm@0
|
74 return 0.0;
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76
|
matthiasm@0
|
77 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
78 {
|
matthiasm@0
|
79 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
80 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
81 // now scale to correct for note density
|
matthiasm@0
|
82 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
83 if (x > 0) {
|
matthiasm@0
|
84 out = out / (c * x);
|
matthiasm@0
|
85 } else {
|
matthiasm@0
|
86 out = 0;
|
matthiasm@0
|
87 }
|
matthiasm@0
|
88 return out;
|
matthiasm@0
|
89 }
|
matthiasm@0
|
90
|
matthiasm@0
|
91 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
92
|
matthiasm@0
|
93 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
94 int minoctave = 0; // this must be 0
|
matthiasm@0
|
95 int maxoctave = 7; // this must be 7
|
matthiasm@0
|
96 int oversampling = 20;
|
matthiasm@0
|
97
|
matthiasm@0
|
98 // linear frequency vector
|
matthiasm@0
|
99 vector<float> fft_f;
|
matthiasm@0
|
100 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
101 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
102 }
|
matthiasm@0
|
103 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
104
|
matthiasm@0
|
105 // linear oversampled frequency vector
|
matthiasm@0
|
106 vector<float> oversampled_f;
|
matthiasm@0
|
107 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
108 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
109 }
|
matthiasm@0
|
110
|
matthiasm@0
|
111 // pitch-spaced frequency vector
|
matthiasm@0
|
112 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
113 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
114 vector<float> cq_f;
|
matthiasm@0
|
115 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
116 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
117 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
118 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
119 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
120 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
121 }
|
matthiasm@0
|
122 }
|
matthiasm@0
|
123 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
124 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
125
|
matthiasm@0
|
126 // limit the linear vectors to the frequencies used
|
matthiasm@0
|
127 float maxfreq = *cq_f.end() * pow(2.0,0.083333);
|
matthiasm@0
|
128 while (*oversampled_f.end() > maxfreq) {
|
matthiasm@0
|
129 oversampled_f.pop_back();
|
matthiasm@0
|
130 }
|
matthiasm@0
|
131 while (*fft_f.end() > maxfreq) {
|
matthiasm@0
|
132 fft_f.pop_back();
|
matthiasm@0
|
133 }
|
matthiasm@0
|
134
|
matthiasm@0
|
135 int nFFT = fft_f.size();
|
matthiasm@0
|
136
|
matthiasm@0
|
137 // for (int i = 0; i < 100; i++) {
|
matthiasm@0
|
138 // cerr << oversampled_f[i] << " " << cospuls(oversampled_f[i],fft_f[1],fft_width) << endl;
|
matthiasm@0
|
139 // }
|
matthiasm@0
|
140
|
matthiasm@0
|
141 vector<float> fft_activation;
|
matthiasm@0
|
142 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
143 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
144 fft_activation.push_back(cosp);
|
matthiasm@0
|
145 // cerr << cosp << endl;
|
matthiasm@0
|
146 }
|
matthiasm@0
|
147
|
matthiasm@0
|
148 float cq_activation;
|
matthiasm@0
|
149 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
150 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
151 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
152 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
153 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
154 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
155 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@0
|
156 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width/2 > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width/2 < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
157 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
158 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
159 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
160 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
161 }
|
matthiasm@0
|
162 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
163 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
164 // }
|
matthiasm@0
|
165 }
|
matthiasm@0
|
166 }
|
matthiasm@0
|
167 }
|
matthiasm@0
|
168 return true;
|
matthiasm@0
|
169 }
|
matthiasm@0
|
170
|
matthiasm@0
|
171
|
matthiasm@0
|
172 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
173 Plugin(inputSampleRate),
|
matthiasm@0
|
174 m_fl(0),
|
matthiasm@0
|
175 m_blockSize(0),
|
matthiasm@0
|
176 m_stepSize(0),
|
matthiasm@0
|
177 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
178 m_meanTuning0(0),
|
matthiasm@0
|
179 m_meanTuning1(0),
|
matthiasm@0
|
180 m_meanTuning2(0),
|
matthiasm@0
|
181 m_localTuning0(0),
|
matthiasm@0
|
182 m_localTuning1(0),
|
matthiasm@0
|
183 m_localTuning2(0),
|
matthiasm@0
|
184 m_paling(0),
|
matthiasm@0
|
185 m_localTuning(0),
|
matthiasm@0
|
186 m_kernelValue(0),
|
matthiasm@0
|
187 m_kernelFftIndex(0),
|
matthiasm@0
|
188 m_kernelNoteIndex(0),
|
matthiasm@0
|
189 m_tuneLocal(false),
|
matthiasm@0
|
190 m_dictID(0)
|
matthiasm@0
|
191 {
|
matthiasm@0
|
192 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@0
|
193 }
|
matthiasm@0
|
194
|
matthiasm@0
|
195
|
matthiasm@0
|
196 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
197 {
|
matthiasm@0
|
198 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@0
|
199 }
|
matthiasm@0
|
200
|
matthiasm@0
|
201 string
|
matthiasm@0
|
202 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
203 {
|
matthiasm@0
|
204 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
205 return "nnls_chroma";
|
matthiasm@0
|
206 }
|
matthiasm@0
|
207
|
matthiasm@0
|
208 string
|
matthiasm@0
|
209 NNLSChroma::getName() const
|
matthiasm@0
|
210 {
|
matthiasm@0
|
211 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
212 return "NNLS Chroma";
|
matthiasm@0
|
213 }
|
matthiasm@0
|
214
|
matthiasm@0
|
215 string
|
matthiasm@0
|
216 NNLSChroma::getDescription() const
|
matthiasm@0
|
217 {
|
matthiasm@0
|
218 // Return something helpful here!
|
matthiasm@0
|
219 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@0
|
220 return "";
|
matthiasm@0
|
221 }
|
matthiasm@0
|
222
|
matthiasm@0
|
223 string
|
matthiasm@0
|
224 NNLSChroma::getMaker() const
|
matthiasm@0
|
225 {
|
matthiasm@0
|
226 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
227 // Your name here
|
matthiasm@0
|
228 return "Matthias Mauch";
|
matthiasm@0
|
229 }
|
matthiasm@0
|
230
|
matthiasm@0
|
231 int
|
matthiasm@0
|
232 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
233 {
|
matthiasm@0
|
234 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
235 // Increment this each time you release a version that behaves
|
matthiasm@0
|
236 // differently from the previous one
|
matthiasm@0
|
237 return 1;
|
matthiasm@0
|
238 }
|
matthiasm@0
|
239
|
matthiasm@0
|
240 string
|
matthiasm@0
|
241 NNLSChroma::getCopyright() const
|
matthiasm@0
|
242 {
|
matthiasm@0
|
243 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
244 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
245 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
246 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
247 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
248 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
249 }
|
matthiasm@0
|
250
|
matthiasm@0
|
251 NNLSChroma::InputDomain
|
matthiasm@0
|
252 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
253 {
|
matthiasm@0
|
254 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
255 return FrequencyDomain;
|
matthiasm@0
|
256 }
|
matthiasm@0
|
257
|
matthiasm@0
|
258 size_t
|
matthiasm@0
|
259 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
260 {
|
matthiasm@0
|
261 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
262 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
263 }
|
matthiasm@0
|
264
|
matthiasm@0
|
265 size_t
|
matthiasm@0
|
266 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
267 {
|
matthiasm@0
|
268 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
269 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
270 // means the same as the block size for TimeDomain
|
matthiasm@0
|
271 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
272 }
|
matthiasm@0
|
273
|
matthiasm@0
|
274 size_t
|
matthiasm@0
|
275 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
276 {
|
matthiasm@0
|
277 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
278 return 1;
|
matthiasm@0
|
279 }
|
matthiasm@0
|
280
|
matthiasm@0
|
281 size_t
|
matthiasm@0
|
282 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
283 {
|
matthiasm@0
|
284 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
285 return 1;
|
matthiasm@0
|
286 }
|
matthiasm@0
|
287
|
matthiasm@0
|
288 NNLSChroma::ParameterList
|
matthiasm@0
|
289 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
290 {
|
matthiasm@0
|
291 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
292 ParameterList list;
|
matthiasm@0
|
293
|
matthiasm@0
|
294 ParameterDescriptor d0;
|
matthiasm@0
|
295 d0.identifier = "notedict";
|
matthiasm@0
|
296 d0.name = "note dictionary";
|
matthiasm@0
|
297 d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@0
|
298 d0.unit = "";
|
matthiasm@0
|
299 d0.minValue = 0;
|
matthiasm@0
|
300 d0.maxValue = 2;
|
matthiasm@0
|
301 d0.defaultValue = 0;
|
matthiasm@0
|
302 d0.isQuantized = true;
|
matthiasm@0
|
303 d0.valueNames.push_back("s = 0.6");
|
matthiasm@0
|
304 d0.valueNames.push_back("s = 0.9");
|
matthiasm@0
|
305 d0.valueNames.push_back("s linearly spaced");
|
matthiasm@0
|
306 d0.valueNames.push_back("no NNLS");
|
matthiasm@0
|
307 d0.quantizeStep = 1.0;
|
matthiasm@0
|
308 list.push_back(d0);
|
matthiasm@0
|
309
|
matthiasm@0
|
310 ParameterDescriptor d1;
|
matthiasm@0
|
311 d1.identifier = "tuningmode";
|
matthiasm@0
|
312 d1.name = "tuning mode";
|
matthiasm@0
|
313 d1.description = "Tuning can be performed locally or on the whole extraction area.";
|
matthiasm@0
|
314 d1.unit = "";
|
matthiasm@0
|
315 d1.minValue = 0;
|
matthiasm@0
|
316 d1.maxValue = 1;
|
matthiasm@0
|
317 d1.defaultValue = 1;
|
matthiasm@0
|
318 d1.isQuantized = true;
|
matthiasm@0
|
319 d1.valueNames.push_back("global tuning");
|
matthiasm@0
|
320 d1.valueNames.push_back("local tuning");
|
matthiasm@0
|
321 d1.quantizeStep = 1.0;
|
matthiasm@0
|
322 list.push_back(d1);
|
matthiasm@0
|
323
|
matthiasm@0
|
324 ParameterDescriptor d2;
|
matthiasm@0
|
325 d2.identifier = "paling";
|
matthiasm@0
|
326 d2.name = "spectral paling";
|
matthiasm@0
|
327 d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@0
|
328 d2.unit = "";
|
matthiasm@0
|
329 d2.minValue = 0;
|
matthiasm@0
|
330 d2.maxValue = 1;
|
matthiasm@0
|
331 d2.defaultValue = 0.5;
|
matthiasm@0
|
332 d2.isQuantized = false;
|
matthiasm@0
|
333 // d1.valueNames.push_back("global tuning");
|
matthiasm@0
|
334 // d1.valueNames.push_back("local tuning");
|
matthiasm@0
|
335 // d1.quantizeStep = 0.1;
|
matthiasm@0
|
336 list.push_back(d2);
|
matthiasm@0
|
337
|
matthiasm@0
|
338 return list;
|
matthiasm@0
|
339 }
|
matthiasm@0
|
340
|
matthiasm@0
|
341 float
|
matthiasm@0
|
342 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
343 {
|
matthiasm@0
|
344 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
345 if (identifier == "notedict") {
|
matthiasm@0
|
346 return m_dictID;
|
matthiasm@0
|
347 }
|
matthiasm@0
|
348
|
matthiasm@0
|
349 if (identifier == "paling") {
|
matthiasm@0
|
350 return m_paling;
|
matthiasm@0
|
351 }
|
matthiasm@0
|
352
|
matthiasm@0
|
353 if (identifier == "tuningmode") {
|
matthiasm@0
|
354 if (m_tuneLocal) {
|
matthiasm@0
|
355 return 1.0;
|
matthiasm@0
|
356 } else {
|
matthiasm@0
|
357 return 0.0;
|
matthiasm@0
|
358 }
|
matthiasm@0
|
359 }
|
matthiasm@0
|
360
|
matthiasm@0
|
361 return 0;
|
matthiasm@0
|
362
|
matthiasm@0
|
363 }
|
matthiasm@0
|
364
|
matthiasm@0
|
365 void
|
matthiasm@0
|
366 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
367 {
|
matthiasm@0
|
368 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
369 if (identifier == "notedict") {
|
matthiasm@0
|
370 m_dictID = (int) value;
|
matthiasm@0
|
371 }
|
matthiasm@0
|
372
|
matthiasm@0
|
373 if (identifier == "paling") {
|
matthiasm@0
|
374 m_paling = value;
|
matthiasm@0
|
375 }
|
matthiasm@0
|
376
|
matthiasm@0
|
377 if (identifier == "tuningmode") {
|
matthiasm@0
|
378 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
379 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
380 }
|
matthiasm@0
|
381 }
|
matthiasm@0
|
382
|
matthiasm@0
|
383 NNLSChroma::ProgramList
|
matthiasm@0
|
384 NNLSChroma::getPrograms() const
|
matthiasm@0
|
385 {
|
matthiasm@0
|
386 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
387 ProgramList list;
|
matthiasm@0
|
388
|
matthiasm@0
|
389 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
390 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
391
|
matthiasm@0
|
392 return list;
|
matthiasm@0
|
393 }
|
matthiasm@0
|
394
|
matthiasm@0
|
395 string
|
matthiasm@0
|
396 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
397 {
|
matthiasm@0
|
398 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
399 return ""; // no programs
|
matthiasm@0
|
400 }
|
matthiasm@0
|
401
|
matthiasm@0
|
402 void
|
matthiasm@0
|
403 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
404 {
|
matthiasm@0
|
405 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
406 }
|
matthiasm@0
|
407
|
matthiasm@0
|
408
|
matthiasm@0
|
409 NNLSChroma::OutputList
|
matthiasm@0
|
410 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
411 {
|
matthiasm@0
|
412 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
413 OutputList list;
|
matthiasm@0
|
414
|
matthiasm@0
|
415 // Make chroma names for the binNames property
|
matthiasm@0
|
416 vector<string> chromanames;
|
matthiasm@0
|
417 vector<string> bothchromanames;
|
matthiasm@0
|
418 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
419 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
420 if (iNote < 12) {
|
matthiasm@0
|
421 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
422 }
|
matthiasm@0
|
423 }
|
matthiasm@0
|
424
|
matthiasm@0
|
425 int nNote = 84;
|
matthiasm@0
|
426
|
matthiasm@0
|
427 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
428 // Every plugin must have at least one output.
|
matthiasm@0
|
429
|
matthiasm@0
|
430 OutputDescriptor d0;
|
matthiasm@0
|
431 d0.identifier = "tuning";
|
matthiasm@0
|
432 d0.name = "Tuning";
|
matthiasm@0
|
433 d0.description = "The concert pitch.";
|
matthiasm@0
|
434 d0.unit = "Hz";
|
matthiasm@0
|
435 d0.hasFixedBinCount = true;
|
matthiasm@0
|
436 d0.binCount = 0;
|
matthiasm@0
|
437 d0.hasKnownExtents = true;
|
matthiasm@0
|
438 d0.minValue = 427.47;
|
matthiasm@0
|
439 d0.maxValue = 452.89;
|
matthiasm@0
|
440 d0.isQuantized = false;
|
matthiasm@0
|
441 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
442 d0.hasDuration = false;
|
matthiasm@0
|
443 list.push_back(d0);
|
matthiasm@0
|
444
|
matthiasm@0
|
445 OutputDescriptor d1;
|
matthiasm@0
|
446 d1.identifier = "logfreqspec";
|
matthiasm@0
|
447 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
448 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
449 d1.unit = "";
|
matthiasm@0
|
450 d1.hasFixedBinCount = true;
|
matthiasm@0
|
451 d1.binCount = nNote;
|
matthiasm@0
|
452 d1.hasKnownExtents = false;
|
matthiasm@0
|
453 d1.isQuantized = false;
|
matthiasm@0
|
454 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
455 d1.hasDuration = false;
|
matthiasm@0
|
456 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
457 list.push_back(d1);
|
matthiasm@0
|
458
|
matthiasm@0
|
459 OutputDescriptor d2;
|
matthiasm@0
|
460 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
461 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
462 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
463 d2.unit = "";
|
matthiasm@0
|
464 d2.hasFixedBinCount = true;
|
matthiasm@0
|
465 d2.binCount = 256;
|
matthiasm@0
|
466 d2.hasKnownExtents = false;
|
matthiasm@0
|
467 d2.isQuantized = false;
|
matthiasm@0
|
468 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
469 d2.hasDuration = false;
|
matthiasm@0
|
470 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
471 list.push_back(d2);
|
matthiasm@0
|
472
|
matthiasm@0
|
473 OutputDescriptor d3;
|
matthiasm@0
|
474 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
475 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
476 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
477 d3.unit = "";
|
matthiasm@0
|
478 d3.hasFixedBinCount = true;
|
matthiasm@0
|
479 d3.binCount = 84;
|
matthiasm@0
|
480 d3.hasKnownExtents = false;
|
matthiasm@0
|
481 d3.isQuantized = false;
|
matthiasm@0
|
482 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
483 d3.hasDuration = false;
|
matthiasm@0
|
484 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
485 list.push_back(d3);
|
matthiasm@0
|
486
|
matthiasm@0
|
487 OutputDescriptor d4;
|
matthiasm@0
|
488 d4.identifier = "chroma";
|
matthiasm@0
|
489 d4.name = "Chromagram";
|
matthiasm@0
|
490 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
491 d4.unit = "";
|
matthiasm@0
|
492 d4.hasFixedBinCount = true;
|
matthiasm@0
|
493 d4.binCount = 12;
|
matthiasm@0
|
494 d4.binNames = chromanames;
|
matthiasm@0
|
495 d4.hasKnownExtents = false;
|
matthiasm@0
|
496 d4.isQuantized = false;
|
matthiasm@0
|
497 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
498 d4.hasDuration = false;
|
matthiasm@0
|
499 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
500 list.push_back(d4);
|
matthiasm@0
|
501
|
matthiasm@0
|
502 OutputDescriptor d5;
|
matthiasm@0
|
503 d5.identifier = "basschroma";
|
matthiasm@0
|
504 d5.name = "Bass Chromagram";
|
matthiasm@0
|
505 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
506 d5.unit = "";
|
matthiasm@0
|
507 d5.hasFixedBinCount = true;
|
matthiasm@0
|
508 d5.binCount = 12;
|
matthiasm@0
|
509 d5.binNames = chromanames;
|
matthiasm@0
|
510 d5.hasKnownExtents = false;
|
matthiasm@0
|
511 d5.isQuantized = false;
|
matthiasm@0
|
512 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
513 d5.hasDuration = false;
|
matthiasm@0
|
514 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
515 list.push_back(d5);
|
matthiasm@0
|
516
|
matthiasm@0
|
517 OutputDescriptor d6;
|
matthiasm@0
|
518 d6.identifier = "bothchroma";
|
matthiasm@0
|
519 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
520 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
521 d6.unit = "";
|
matthiasm@0
|
522 d6.hasFixedBinCount = true;
|
matthiasm@0
|
523 d6.binCount = 24;
|
matthiasm@0
|
524 d6.binNames = bothchromanames;
|
matthiasm@0
|
525 d6.hasKnownExtents = false;
|
matthiasm@0
|
526 d6.isQuantized = false;
|
matthiasm@0
|
527 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
528 d6.hasDuration = false;
|
matthiasm@0
|
529 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
530 list.push_back(d6);
|
matthiasm@0
|
531
|
matthiasm@0
|
532 OutputDescriptor d7;
|
matthiasm@0
|
533 d7.identifier = "simplechord";
|
matthiasm@0
|
534 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
535 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
536 d7.unit = "";
|
matthiasm@0
|
537 d7.hasFixedBinCount = true;
|
matthiasm@0
|
538 d7.binCount = 0;
|
matthiasm@0
|
539 d7.hasKnownExtents = false;
|
matthiasm@0
|
540 d7.isQuantized = false;
|
matthiasm@0
|
541 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
542 d7.hasDuration = false;
|
matthiasm@0
|
543 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
544 list.push_back(d7);
|
matthiasm@0
|
545
|
matthiasm@0
|
546 OutputDescriptor d8;
|
matthiasm@0
|
547 d8.identifier = "inconsistency";
|
matthiasm@0
|
548 d8.name = "Harmonic inconsistency value";
|
matthiasm@0
|
549 d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@0
|
550 d8.unit = "";
|
matthiasm@0
|
551 d8.hasFixedBinCount = true;
|
matthiasm@0
|
552 d8.binCount = 1;
|
matthiasm@0
|
553 d8.hasKnownExtents = false;
|
matthiasm@0
|
554 d8.isQuantized = false;
|
matthiasm@0
|
555 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
556 d8.hasDuration = false;
|
matthiasm@0
|
557 d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
558 list.push_back(d8);
|
matthiasm@0
|
559
|
matthiasm@0
|
560 OutputDescriptor d9;
|
matthiasm@0
|
561 d9.identifier = "inconsistencysegment";
|
matthiasm@0
|
562 d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@0
|
563 d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@0
|
564 d9.unit = "";
|
matthiasm@0
|
565 d9.hasFixedBinCount = true;
|
matthiasm@0
|
566 d9.binCount = 0;
|
matthiasm@0
|
567 d9.hasKnownExtents = true;
|
matthiasm@0
|
568 d9.minValue = 0.1;
|
matthiasm@0
|
569 d9.maxValue = 0.9;
|
matthiasm@0
|
570 d9.isQuantized = false;
|
matthiasm@0
|
571 d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
572 d9.hasDuration = false;
|
matthiasm@0
|
573 d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
574 list.push_back(d9);
|
matthiasm@0
|
575
|
matthiasm@0
|
576 OutputDescriptor d10;
|
matthiasm@0
|
577 d10.identifier = "localtuning";
|
matthiasm@0
|
578 d10.name = "Local tuning";
|
matthiasm@0
|
579 d10.description = "";
|
matthiasm@0
|
580 d10.unit = "Hz";
|
matthiasm@0
|
581 d10.hasFixedBinCount = true;
|
matthiasm@0
|
582 d10.binCount = 1;
|
matthiasm@0
|
583 d10.hasKnownExtents = true;
|
matthiasm@0
|
584 d10.minValue = 427.47;
|
matthiasm@0
|
585 d10.maxValue = 452.89;
|
matthiasm@0
|
586 d10.isQuantized = false;
|
matthiasm@0
|
587 d10.sampleType = OutputDescriptor::OneSamplePerStep;
|
matthiasm@0
|
588 d10.hasDuration = false;
|
matthiasm@0
|
589 d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
590 list.push_back(d10);
|
matthiasm@0
|
591
|
matthiasm@0
|
592 return list;
|
matthiasm@0
|
593 }
|
matthiasm@0
|
594
|
matthiasm@0
|
595
|
matthiasm@0
|
596 bool
|
matthiasm@0
|
597 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
598 {
|
matthiasm@0
|
599 if (debug_on) cerr << "--> initialise";
|
matthiasm@0
|
600 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
601 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
602 m_blockSize = blockSize;
|
matthiasm@0
|
603 m_stepSize = stepSize;
|
matthiasm@0
|
604 frameCount = 0;
|
matthiasm@0
|
605 int tempn = 256 * m_blockSize/2;
|
matthiasm@0
|
606 cerr << tempn;
|
matthiasm@0
|
607 float *tempkernel = new float[tempn];
|
matthiasm@0
|
608
|
matthiasm@0
|
609 // vector<float> m_kernelValue;
|
matthiasm@0
|
610 // vector<int> m_kernelFftIndex;
|
matthiasm@0
|
611 // vector<int> m_kernelNoteIndex;
|
matthiasm@0
|
612
|
matthiasm@0
|
613
|
matthiasm@0
|
614 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@0
|
615
|
matthiasm@0
|
616 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@0
|
617 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@0
|
618 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@0
|
619 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
620 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@0
|
621 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
622 }
|
matthiasm@0
|
623 }
|
matthiasm@0
|
624 }
|
matthiasm@0
|
625 delete tempkernel;
|
matthiasm@0
|
626 // int count = 0;
|
matthiasm@0
|
627 // for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
628 // cerr << m_kernelFftIndex[count] << " -- " << m_kernelNoteIndex[count] << " -- " << m_kernelValue[count] << endl;
|
matthiasm@0
|
629 // count++;
|
matthiasm@0
|
630 // }
|
matthiasm@0
|
631 return true;
|
matthiasm@0
|
632 }
|
matthiasm@0
|
633
|
matthiasm@0
|
634 void
|
matthiasm@0
|
635 NNLSChroma::reset()
|
matthiasm@0
|
636 {
|
matthiasm@0
|
637 if (debug_on) cerr << "--> reset";
|
matthiasm@0
|
638 // Clear buffers, reset stored values, etc
|
matthiasm@0
|
639 frameCount = 0;
|
matthiasm@0
|
640 m_dictID = 0;
|
matthiasm@0
|
641 }
|
matthiasm@0
|
642
|
matthiasm@0
|
643 NNLSChroma::FeatureSet
|
matthiasm@0
|
644 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
645 {
|
matthiasm@0
|
646 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
647 // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix
|
matthiasm@0
|
648
|
matthiasm@0
|
649 frameCount++;
|
matthiasm@0
|
650 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
651
|
matthiasm@0
|
652 Feature f10; // local tuning
|
matthiasm@0
|
653
|
matthiasm@0
|
654 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
655
|
matthiasm@0
|
656 // make magnitude
|
matthiasm@0
|
657 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
658 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@0
|
659 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@0
|
660 }
|
matthiasm@0
|
661
|
matthiasm@0
|
662
|
matthiasm@0
|
663 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
664 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
665 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
666 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
667 }
|
matthiasm@0
|
668 int binCount = 0;
|
matthiasm@0
|
669 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
670 // cerr << ".";
|
matthiasm@0
|
671 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@0
|
672 binCount++;
|
matthiasm@0
|
673 }
|
matthiasm@0
|
674 cerr << nm[20];
|
matthiasm@0
|
675 cerr << endl;
|
matthiasm@0
|
676
|
matthiasm@0
|
677
|
matthiasm@0
|
678 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
679 // update means of complex tuning variables
|
matthiasm@0
|
680 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
681 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
682 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
683
|
matthiasm@0
|
684 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
685 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
686 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
687 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@0
|
688 m_localTuning0 *= 0.99994; m_localTuning0 += nm[iTone + 0];
|
matthiasm@0
|
689 m_localTuning1 *= 0.99994; m_localTuning1 += nm[iTone + 1];
|
matthiasm@0
|
690 m_localTuning2 *= 0.99994; m_localTuning2 += nm[iTone + 2];
|
matthiasm@0
|
691 }
|
matthiasm@0
|
692
|
matthiasm@0
|
693 // if (m_tuneLocal) {
|
matthiasm@0
|
694 // local tuning
|
matthiasm@0
|
695 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
696 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
697 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
698 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
699 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
700 f10.values.push_back(tuning440);
|
matthiasm@0
|
701 // }
|
matthiasm@0
|
702
|
matthiasm@0
|
703 Feature f1; // logfreqspec
|
matthiasm@0
|
704 f1.hasTimestamp = true;
|
matthiasm@0
|
705 f1.timestamp = timestamp;
|
matthiasm@0
|
706 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
707 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
708 }
|
matthiasm@0
|
709
|
matthiasm@0
|
710 FeatureSet fs;
|
matthiasm@0
|
711 fs[1].push_back(f1);
|
matthiasm@0
|
712 fs[10].push_back(f10);
|
matthiasm@0
|
713
|
matthiasm@0
|
714 // deletes
|
matthiasm@0
|
715 delete[] magnitude;
|
matthiasm@0
|
716 delete[] nm;
|
matthiasm@0
|
717
|
matthiasm@0
|
718 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@0
|
719 return fs;
|
matthiasm@0
|
720 }
|
matthiasm@0
|
721
|
matthiasm@0
|
722 NNLSChroma::FeatureSet
|
matthiasm@0
|
723 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
724 {
|
matthiasm@0
|
725 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@0
|
726 FeatureSet fsOut;
|
matthiasm@0
|
727 //
|
matthiasm@0
|
728 // /** Calculate Tuning
|
matthiasm@0
|
729 // calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@0
|
730 // cumulative mean real and imag values)
|
matthiasm@0
|
731 // **/
|
matthiasm@0
|
732 // float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@0
|
733 // float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@0
|
734 // float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@0
|
735 // float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@0
|
736 // int intShift = floor(normalisedtuning * 3);
|
matthiasm@0
|
737 // float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@0
|
738 //
|
matthiasm@0
|
739 // char buffer0 [50];
|
matthiasm@0
|
740 //
|
matthiasm@0
|
741 // sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@0
|
742 //
|
matthiasm@0
|
743 // // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@0
|
744 //
|
matthiasm@0
|
745 // // push tuning to FeatureSet fsOut
|
matthiasm@0
|
746 // Feature f0; // tuning
|
matthiasm@0
|
747 // f0.hasTimestamp = true;
|
matthiasm@0
|
748 // f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@0
|
749 // f0.label = buffer0;
|
matthiasm@0
|
750 // fsOut[0].push_back(f0);
|
matthiasm@0
|
751 //
|
matthiasm@0
|
752 // /** Tune Log-Frequency Spectrogram
|
matthiasm@0
|
753 // calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@0
|
754 // perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@0
|
755 // **/
|
matthiasm@0
|
756 //
|
matthiasm@0
|
757 // float tempValue = 0;
|
matthiasm@0
|
758 // float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@0
|
759 // float thresh = pow(10,dbThreshold/20);
|
matthiasm@0
|
760 // // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@0
|
761 // int count = 0;
|
matthiasm@0
|
762 //
|
matthiasm@0
|
763 // for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@0
|
764 // Feature f1 = *i;
|
matthiasm@0
|
765 // Feature f2; // tuned log-frequency spectrum
|
matthiasm@0
|
766 // f2.hasTimestamp = true;
|
matthiasm@0
|
767 // f2.timestamp = f1.timestamp;
|
matthiasm@0
|
768 // f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@0
|
769 //
|
matthiasm@0
|
770 // if (m_tuneLocal) {
|
matthiasm@0
|
771 // intShift = floor(m_localTuning[count] * 3);
|
matthiasm@0
|
772 // intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@0
|
773 // }
|
matthiasm@0
|
774 //
|
matthiasm@0
|
775 // // cerr << intShift << " " << intFactor << endl;
|
matthiasm@0
|
776 //
|
matthiasm@0
|
777 // for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@0
|
778 // tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@0
|
779 // f2.values.push_back(tempValue);
|
matthiasm@0
|
780 // }
|
matthiasm@0
|
781 //
|
matthiasm@0
|
782 // f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@0
|
783 // vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@0
|
784 // vector<float> runningstd;
|
matthiasm@0
|
785 // for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@0
|
786 // runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@0
|
787 // }
|
matthiasm@0
|
788 // runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@0
|
789 // for (int i = 0; i < 256; i++) {
|
matthiasm@0
|
790 // runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@0
|
791 // if (runningstd[i] > 0) {
|
matthiasm@0
|
792 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@0
|
793 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@0
|
794 // }
|
matthiasm@0
|
795 // if (f2.values[i] < 0) {
|
matthiasm@0
|
796 // cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@0
|
797 // }
|
matthiasm@0
|
798 // }
|
matthiasm@0
|
799 // fsOut[2].push_back(f2);
|
matthiasm@0
|
800 // count++;
|
matthiasm@0
|
801 // }
|
matthiasm@0
|
802 //
|
matthiasm@0
|
803 // /** Semitone spectrum and chromagrams
|
matthiasm@0
|
804 // Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@0
|
805 // is inferred using a non-negative least squares algorithm.
|
matthiasm@0
|
806 // Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@0
|
807 // bass and treble stacked onto each other).
|
matthiasm@0
|
808 // **/
|
matthiasm@0
|
809 // // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
|
matthiasm@0
|
810 //
|
matthiasm@0
|
811 // vector<vector<float> > chordogram;
|
matthiasm@0
|
812 // vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@0
|
813 // vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@0
|
814 // count = 0;
|
matthiasm@0
|
815 //
|
matthiasm@0
|
816 // for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@0
|
817 // Feature f2 = *it; // logfreq spectrum
|
matthiasm@0
|
818 // Feature f3; // semitone spectrum
|
matthiasm@0
|
819 // Feature f4; // treble chromagram
|
matthiasm@0
|
820 // Feature f5; // bass chromagram
|
matthiasm@0
|
821 // Feature f6; // treble and bass chromagram
|
matthiasm@0
|
822 //
|
matthiasm@0
|
823 // f3.hasTimestamp = true;
|
matthiasm@0
|
824 // f3.timestamp = f2.timestamp;
|
matthiasm@0
|
825 //
|
matthiasm@0
|
826 // f4.hasTimestamp = true;
|
matthiasm@0
|
827 // f4.timestamp = f2.timestamp;
|
matthiasm@0
|
828 //
|
matthiasm@0
|
829 // f5.hasTimestamp = true;
|
matthiasm@0
|
830 // f5.timestamp = f2.timestamp;
|
matthiasm@0
|
831 //
|
matthiasm@0
|
832 // f6.hasTimestamp = true;
|
matthiasm@0
|
833 // f6.timestamp = f2.timestamp;
|
matthiasm@0
|
834 //
|
matthiasm@0
|
835 // float b[256];
|
matthiasm@0
|
836 //
|
matthiasm@0
|
837 // bool some_b_greater_zero = false;
|
matthiasm@0
|
838 // for (int i = 0; i < 256; i++) {
|
matthiasm@0
|
839 // b[i] = f2.values[i];
|
matthiasm@0
|
840 // if (b[i] > 0) {
|
matthiasm@0
|
841 // some_b_greater_zero = true;
|
matthiasm@0
|
842 // }
|
matthiasm@0
|
843 // }
|
matthiasm@0
|
844 //
|
matthiasm@0
|
845 // // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@0
|
846 //
|
matthiasm@0
|
847 // vector<float> chroma = vector<float>(12, 0);
|
matthiasm@0
|
848 // vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@0
|
849 // if (some_b_greater_zero) {
|
matthiasm@0
|
850 // }
|
matthiasm@0
|
851 //
|
matthiasm@0
|
852 // f4.values = chroma;
|
matthiasm@0
|
853 // f5.values = basschroma;
|
matthiasm@0
|
854 // chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@0
|
855 // f6.values = chroma;
|
matthiasm@0
|
856 //
|
matthiasm@0
|
857 // // local chord estimation
|
matthiasm@0
|
858 // vector<float> currentChordSalience;
|
matthiasm@0
|
859 // float tempchordvalue = 0;
|
matthiasm@0
|
860 // float sumchordvalue = 0;
|
matthiasm@0
|
861 // int nChord = nChorddict / 24;
|
matthiasm@0
|
862 // for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@0
|
863 // tempchordvalue = 0;
|
matthiasm@0
|
864 // for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@0
|
865 // tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@0
|
866 // }
|
matthiasm@0
|
867 // for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@0
|
868 // tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@0
|
869 // }
|
matthiasm@0
|
870 // sumchordvalue+=tempchordvalue;
|
matthiasm@0
|
871 // currentChordSalience.push_back(tempchordvalue);
|
matthiasm@0
|
872 // }
|
matthiasm@0
|
873 // for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@0
|
874 // currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@0
|
875 // }
|
matthiasm@0
|
876 // chordogram.push_back(currentChordSalience);
|
matthiasm@0
|
877 //
|
matthiasm@0
|
878 // fsOut[3].push_back(f3);
|
matthiasm@0
|
879 // fsOut[4].push_back(f4);
|
matthiasm@0
|
880 // fsOut[5].push_back(f5);
|
matthiasm@0
|
881 // fsOut[6].push_back(f6);
|
matthiasm@0
|
882 // // if (x) free(x);
|
matthiasm@0
|
883 // // delete[] b;
|
matthiasm@0
|
884 // count++;
|
matthiasm@0
|
885 // }
|
matthiasm@0
|
886 // // cerr << m_stepSize << endl<< endl;
|
matthiasm@0
|
887 // count = 0;
|
matthiasm@0
|
888 // int kernelwidth = (49 * 2048) / m_stepSize;
|
matthiasm@0
|
889 // int nChord = nChorddict / 24;
|
matthiasm@0
|
890 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
|
matthiasm@0
|
891 //
|
matthiasm@0
|
892 // /* Simple chord estimation
|
matthiasm@0
|
893 // I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@0
|
894 // take the maximum. Very simple, don't do this at home...
|
matthiasm@0
|
895 // */
|
matthiasm@0
|
896 // vector<int> chordSequence;
|
matthiasm@0
|
897 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@0
|
898 //
|
matthiasm@0
|
899 // int startIndex = max(count - kernelwidth/2 + 1,0);
|
matthiasm@0
|
900 // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1 + 1);
|
matthiasm@0
|
901 // vector<float> temp = vector<float>(nChord,0);
|
matthiasm@0
|
902 // for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@0
|
903 // float val = 0;
|
matthiasm@0
|
904 // for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@0
|
905 // val += chordogram[i][iChord] *
|
matthiasm@0
|
906 // (kernelwidth - abs(i - startIndex - kernelwidth * 0.5)); // weigthed sum (triangular window)
|
matthiasm@0
|
907 // }
|
matthiasm@0
|
908 // temp[iChord] = val; // sum
|
matthiasm@0
|
909 // }
|
matthiasm@0
|
910 //
|
matthiasm@0
|
911 // // get maximum for "chord estimate"
|
matthiasm@0
|
912 //
|
matthiasm@0
|
913 // float bestChordValue = 0;
|
matthiasm@0
|
914 // int bestChordIndex = nChord-1; // "no chord" is default
|
matthiasm@0
|
915 // for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@0
|
916 // if (temp[iChord] > bestChordValue) {
|
matthiasm@0
|
917 // bestChordValue = temp[iChord];
|
matthiasm@0
|
918 // bestChordIndex = iChord;
|
matthiasm@0
|
919 // }
|
matthiasm@0
|
920 // }
|
matthiasm@0
|
921 // // cerr << bestChordIndex << endl;
|
matthiasm@0
|
922 // chordSequence.push_back(bestChordIndex);
|
matthiasm@0
|
923 // count++;
|
matthiasm@0
|
924 // }
|
matthiasm@0
|
925 // // mode filter on chordSequence
|
matthiasm@0
|
926 // count = 0;
|
matthiasm@0
|
927 // int oldChordIndex = -1;
|
matthiasm@0
|
928 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@0
|
929 // Feature f6 = *it;
|
matthiasm@0
|
930 // Feature f7; // chord estimate
|
matthiasm@0
|
931 //
|
matthiasm@0
|
932 // f7.hasTimestamp = true;
|
matthiasm@0
|
933 // f7.timestamp = f6.timestamp;
|
matthiasm@0
|
934 //
|
matthiasm@0
|
935 // vector<int> chordCount = vector<int>(121,0);
|
matthiasm@0
|
936 //
|
matthiasm@0
|
937 // int maxChordCount = 0;
|
matthiasm@0
|
938 // int maxChordIndex = 120;
|
matthiasm@0
|
939 // int startIndex = max(count - kernelwidth/2,0);
|
matthiasm@0
|
940 // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1);
|
matthiasm@0
|
941 // for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@0
|
942 // chordCount[chordSequence[i]]++;
|
matthiasm@0
|
943 // if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@0
|
944 // maxChordCount++;
|
matthiasm@0
|
945 // maxChordIndex = chordSequence[i];
|
matthiasm@0
|
946 // }
|
matthiasm@0
|
947 // }
|
matthiasm@0
|
948 // if (oldChordIndex != maxChordIndex) {
|
matthiasm@0
|
949 // oldChordIndex = maxChordIndex;
|
matthiasm@0
|
950 //
|
matthiasm@0
|
951 // char buffer1 [50];
|
matthiasm@0
|
952 // if (maxChordIndex < nChord - 1) {
|
matthiasm@0
|
953 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@0
|
954 // } else {
|
matthiasm@0
|
955 // sprintf(buffer1, "N");
|
matthiasm@0
|
956 // }
|
matthiasm@0
|
957 // f7.label = buffer1;
|
matthiasm@0
|
958 // fsOut[7].push_back(f7);
|
matthiasm@0
|
959 // }
|
matthiasm@0
|
960 // count++;
|
matthiasm@0
|
961 // }
|
matthiasm@0
|
962 // // musicity
|
matthiasm@0
|
963 // count = 0;
|
matthiasm@0
|
964 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
965 // vector<float> musicityValue;
|
matthiasm@0
|
966 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
967 // Feature f4 = *it;
|
matthiasm@0
|
968 //
|
matthiasm@0
|
969 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
970 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
971 // float chromasum = 0;
|
matthiasm@0
|
972 // float diffsum = 0;
|
matthiasm@0
|
973 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
974 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
975 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
976 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
977 // }
|
matthiasm@0
|
978 // }
|
matthiasm@0
|
979 // diffsum /= chromasum;
|
matthiasm@0
|
980 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
981 // count++;
|
matthiasm@0
|
982 // }
|
matthiasm@0
|
983 //
|
matthiasm@0
|
984 // float musicityThreshold = 0.44;
|
matthiasm@0
|
985 // if (m_stepSize == 4096) {
|
matthiasm@0
|
986 // musicityThreshold = 0.74;
|
matthiasm@0
|
987 // }
|
matthiasm@0
|
988 // if (m_stepSize == 4410) {
|
matthiasm@0
|
989 // musicityThreshold = 0.77;
|
matthiasm@0
|
990 // }
|
matthiasm@0
|
991 //
|
matthiasm@0
|
992 // count = 0;
|
matthiasm@0
|
993 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
994 // Feature f4 = *it;
|
matthiasm@0
|
995 // Feature f8; // musicity
|
matthiasm@0
|
996 // Feature f9; // musicity segmenter
|
matthiasm@0
|
997 //
|
matthiasm@0
|
998 // f8.hasTimestamp = true;
|
matthiasm@0
|
999 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1000 // f9.hasTimestamp = true;
|
matthiasm@0
|
1001 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1002 //
|
matthiasm@0
|
1003 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1004 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1005 // int musicityCount = 0;
|
matthiasm@0
|
1006 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1007 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1008 // }
|
matthiasm@0
|
1009 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1010 //
|
matthiasm@0
|
1011 // if (isSpeech) {
|
matthiasm@0
|
1012 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1013 // f9.label = "Speech";
|
matthiasm@0
|
1014 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1015 // oldlabeltype = 2;
|
matthiasm@0
|
1016 // }
|
matthiasm@0
|
1017 // } else {
|
matthiasm@0
|
1018 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1019 // f9.label = "Music";
|
matthiasm@0
|
1020 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1021 // oldlabeltype = 1;
|
matthiasm@0
|
1022 // }
|
matthiasm@0
|
1023 // }
|
matthiasm@0
|
1024 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1025 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1026 // count++;
|
matthiasm@0
|
1027 // }
|
matthiasm@0
|
1028 return fsOut;
|
matthiasm@0
|
1029
|
matthiasm@0
|
1030 }
|
matthiasm@0
|
1031
|