matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@0
|
4 #include <list>
|
matthiasm@0
|
5 #include <iostream>
|
matthiasm@0
|
6 #include <sstream>
|
matthiasm@0
|
7 #include <cassert>
|
matthiasm@0
|
8 #include <cstdio>
|
matthiasm@1
|
9 #include "nnls.h"
|
matthiasm@0
|
10 // #include "cblas.h"
|
matthiasm@0
|
11 #include "chorddict.cpp"
|
matthiasm@0
|
12 using namespace std;
|
matthiasm@0
|
13
|
matthiasm@0
|
14 const float sinvalue = 0.866025404;
|
matthiasm@0
|
15 const float cosvalue = -0.5;
|
matthiasm@0
|
16 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
17 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
18 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
19 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
20 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@0
|
21 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
22 const int nNote = 256;
|
matthiasm@0
|
23
|
matthiasm@0
|
24 /** Special Convolution
|
matthiasm@0
|
25 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
26 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
27 as the first (last) valid convolution bin.
|
matthiasm@0
|
28 **/
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
matthiasm@0
|
32 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
33 {
|
matthiasm@0
|
34 float s;
|
matthiasm@0
|
35 int m, n;
|
matthiasm@0
|
36 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
37 int lenKernel = kernel.size();
|
matthiasm@0
|
38
|
matthiasm@0
|
39 vector<float> Z(256,0);
|
matthiasm@0
|
40 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
41
|
matthiasm@0
|
42 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
43 s=0.0;
|
matthiasm@0
|
44 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
45 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
46 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
47 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
48 }
|
matthiasm@0
|
49 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
50 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
51 }
|
matthiasm@0
|
52
|
matthiasm@0
|
53 // fill upper and lower pads
|
matthiasm@0
|
54 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
55 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
56 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
57 return Z;
|
matthiasm@0
|
58 }
|
matthiasm@0
|
59
|
matthiasm@0
|
60 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
61 // {
|
matthiasm@0
|
62 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
63 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
64 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
65 // }
|
matthiasm@0
|
66 // return freq;
|
matthiasm@0
|
67 // }
|
matthiasm@0
|
68
|
matthiasm@0
|
69 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
70 {
|
matthiasm@0
|
71 float recipwidth = 1.0/width;
|
matthiasm@0
|
72 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
73 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
74 }
|
matthiasm@0
|
75 return 0.0;
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77
|
matthiasm@0
|
78 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
79 {
|
matthiasm@0
|
80 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
81 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
82 // now scale to correct for note density
|
matthiasm@0
|
83 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
84 if (x > 0) {
|
matthiasm@0
|
85 out = out / (c * x);
|
matthiasm@0
|
86 } else {
|
matthiasm@0
|
87 out = 0;
|
matthiasm@0
|
88 }
|
matthiasm@0
|
89 return out;
|
matthiasm@0
|
90 }
|
matthiasm@0
|
91
|
matthiasm@0
|
92 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
93
|
matthiasm@0
|
94 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
95 int minoctave = 0; // this must be 0
|
matthiasm@0
|
96 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
97 int oversampling = 80;
|
matthiasm@0
|
98
|
matthiasm@0
|
99 // linear frequency vector
|
matthiasm@0
|
100 vector<float> fft_f;
|
matthiasm@0
|
101 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
102 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
103 }
|
matthiasm@0
|
104 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
105
|
matthiasm@0
|
106 // linear oversampled frequency vector
|
matthiasm@0
|
107 vector<float> oversampled_f;
|
matthiasm@0
|
108 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
109 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111
|
matthiasm@0
|
112 // pitch-spaced frequency vector
|
matthiasm@0
|
113 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
114 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
115 vector<float> cq_f;
|
matthiasm@0
|
116 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
117 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
118 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
119 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
120 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
121 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
122 }
|
matthiasm@0
|
123 }
|
matthiasm@0
|
124 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
125 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
126
|
matthiasm@0
|
127 int nFFT = fft_f.size();
|
matthiasm@0
|
128
|
matthiasm@0
|
129 vector<float> fft_activation;
|
matthiasm@0
|
130 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
131 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
132 fft_activation.push_back(cosp);
|
matthiasm@0
|
133 // cerr << cosp << endl;
|
matthiasm@0
|
134 }
|
matthiasm@0
|
135
|
matthiasm@0
|
136 float cq_activation;
|
matthiasm@0
|
137 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
138 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
139 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
140 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
141 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
142 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
143 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
144 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
145 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
146 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
147 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
148 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
149 }
|
matthiasm@0
|
150 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
151 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
152 // }
|
matthiasm@0
|
153 }
|
matthiasm@0
|
154 }
|
matthiasm@0
|
155 }
|
matthiasm@0
|
156 return true;
|
matthiasm@0
|
157 }
|
matthiasm@0
|
158
|
matthiasm@1
|
159 bool dictionaryMatrix(double* dm) {
|
matthiasm@1
|
160 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
161 int minoctave = 0; // this must be 0
|
matthiasm@1
|
162 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
163 float s_param = 0.6;
|
matthiasm@1
|
164
|
matthiasm@1
|
165 // pitch-spaced frequency vector
|
matthiasm@1
|
166 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
167 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
168 vector<float> cq_f;
|
matthiasm@1
|
169 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
170 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
171 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
172 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
173 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
174 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
175 }
|
matthiasm@1
|
176 }
|
matthiasm@1
|
177 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
178 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
179
|
matthiasm@1
|
180 // make out frequency vector
|
matthiasm@1
|
181 vector<float> out_f;
|
matthiasm@1
|
182
|
matthiasm@1
|
183 float curr_f;
|
matthiasm@1
|
184 float floatbin;
|
matthiasm@1
|
185 float curr_amp;
|
matthiasm@1
|
186 // now for every combination calculate the matrix element
|
matthiasm@1
|
187 unsigned countElement = 0;
|
matthiasm@1
|
188 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@1
|
189 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
190 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@1
|
191 if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@1
|
192 floatbin = (iOut * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@1
|
193 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@1
|
194 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@1
|
195 // cerr << dm[countElement] << endl;
|
matthiasm@1
|
196 dm[countElement] = cospuls(iNote+1.0, floatbin, binspersemitone + 0.0);
|
matthiasm@1
|
197 countElement++;
|
matthiasm@1
|
198 }
|
matthiasm@1
|
199 }
|
matthiasm@1
|
200 }
|
matthiasm@1
|
201 }
|
matthiasm@1
|
202
|
matthiasm@0
|
203
|
matthiasm@0
|
204 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
205 Plugin(inputSampleRate),
|
matthiasm@0
|
206 m_fl(0),
|
matthiasm@0
|
207 m_blockSize(0),
|
matthiasm@0
|
208 m_stepSize(0),
|
matthiasm@0
|
209 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
210 m_meanTuning0(0),
|
matthiasm@0
|
211 m_meanTuning1(0),
|
matthiasm@0
|
212 m_meanTuning2(0),
|
matthiasm@0
|
213 m_localTuning0(0),
|
matthiasm@0
|
214 m_localTuning1(0),
|
matthiasm@0
|
215 m_localTuning2(0),
|
matthiasm@0
|
216 m_paling(0),
|
matthiasm@0
|
217 m_localTuning(0),
|
matthiasm@0
|
218 m_kernelValue(0),
|
matthiasm@0
|
219 m_kernelFftIndex(0),
|
matthiasm@0
|
220 m_kernelNoteIndex(0),
|
matthiasm@1
|
221 m_dict(0),
|
matthiasm@0
|
222 m_tuneLocal(false),
|
matthiasm@0
|
223 m_dictID(0)
|
matthiasm@0
|
224 {
|
matthiasm@0
|
225 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@1
|
226 m_dict = new double[nNote * 84];
|
matthiasm@1
|
227 dictionaryMatrix(m_dict);
|
matthiasm@0
|
228 }
|
matthiasm@0
|
229
|
matthiasm@0
|
230
|
matthiasm@0
|
231 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
232 {
|
matthiasm@0
|
233 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
234 delete [] m_dict;
|
matthiasm@0
|
235 }
|
matthiasm@0
|
236
|
matthiasm@0
|
237 string
|
matthiasm@0
|
238 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
239 {
|
matthiasm@0
|
240 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
241 return "nnls_chroma";
|
matthiasm@0
|
242 }
|
matthiasm@0
|
243
|
matthiasm@0
|
244 string
|
matthiasm@0
|
245 NNLSChroma::getName() const
|
matthiasm@0
|
246 {
|
matthiasm@0
|
247 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
248 return "NNLS Chroma";
|
matthiasm@0
|
249 }
|
matthiasm@0
|
250
|
matthiasm@0
|
251 string
|
matthiasm@0
|
252 NNLSChroma::getDescription() const
|
matthiasm@0
|
253 {
|
matthiasm@0
|
254 // Return something helpful here!
|
matthiasm@0
|
255 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@0
|
256 return "";
|
matthiasm@0
|
257 }
|
matthiasm@0
|
258
|
matthiasm@0
|
259 string
|
matthiasm@0
|
260 NNLSChroma::getMaker() const
|
matthiasm@0
|
261 {
|
matthiasm@0
|
262 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
263 // Your name here
|
matthiasm@0
|
264 return "Matthias Mauch";
|
matthiasm@0
|
265 }
|
matthiasm@0
|
266
|
matthiasm@0
|
267 int
|
matthiasm@0
|
268 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
269 {
|
matthiasm@0
|
270 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
271 // Increment this each time you release a version that behaves
|
matthiasm@0
|
272 // differently from the previous one
|
matthiasm@0
|
273 return 1;
|
matthiasm@0
|
274 }
|
matthiasm@0
|
275
|
matthiasm@0
|
276 string
|
matthiasm@0
|
277 NNLSChroma::getCopyright() const
|
matthiasm@0
|
278 {
|
matthiasm@0
|
279 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
280 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
281 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
282 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
283 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
284 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
285 }
|
matthiasm@0
|
286
|
matthiasm@0
|
287 NNLSChroma::InputDomain
|
matthiasm@0
|
288 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
289 {
|
matthiasm@0
|
290 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
291 return FrequencyDomain;
|
matthiasm@0
|
292 }
|
matthiasm@0
|
293
|
matthiasm@0
|
294 size_t
|
matthiasm@0
|
295 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
296 {
|
matthiasm@0
|
297 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
298 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
299 }
|
matthiasm@0
|
300
|
matthiasm@0
|
301 size_t
|
matthiasm@0
|
302 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
303 {
|
matthiasm@0
|
304 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
305 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
306 // means the same as the block size for TimeDomain
|
matthiasm@0
|
307 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
308 }
|
matthiasm@0
|
309
|
matthiasm@0
|
310 size_t
|
matthiasm@0
|
311 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
312 {
|
matthiasm@0
|
313 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
314 return 1;
|
matthiasm@0
|
315 }
|
matthiasm@0
|
316
|
matthiasm@0
|
317 size_t
|
matthiasm@0
|
318 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
319 {
|
matthiasm@0
|
320 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
321 return 1;
|
matthiasm@0
|
322 }
|
matthiasm@0
|
323
|
matthiasm@0
|
324 NNLSChroma::ParameterList
|
matthiasm@0
|
325 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
326 {
|
matthiasm@0
|
327 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
328 ParameterList list;
|
matthiasm@0
|
329
|
matthiasm@0
|
330 ParameterDescriptor d0;
|
matthiasm@0
|
331 d0.identifier = "notedict";
|
matthiasm@0
|
332 d0.name = "note dictionary";
|
matthiasm@0
|
333 d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@0
|
334 d0.unit = "";
|
matthiasm@0
|
335 d0.minValue = 0;
|
matthiasm@1
|
336 d0.maxValue = 1;
|
matthiasm@0
|
337 d0.defaultValue = 0;
|
matthiasm@0
|
338 d0.isQuantized = true;
|
matthiasm@0
|
339 d0.valueNames.push_back("s = 0.6");
|
matthiasm@1
|
340 // d0.valueNames.push_back("s = 0.9");
|
matthiasm@1
|
341 // d0.valueNames.push_back("s linearly spaced");
|
matthiasm@0
|
342 d0.valueNames.push_back("no NNLS");
|
matthiasm@0
|
343 d0.quantizeStep = 1.0;
|
matthiasm@0
|
344 list.push_back(d0);
|
matthiasm@0
|
345
|
matthiasm@0
|
346 ParameterDescriptor d1;
|
matthiasm@0
|
347 d1.identifier = "tuningmode";
|
matthiasm@0
|
348 d1.name = "tuning mode";
|
matthiasm@0
|
349 d1.description = "Tuning can be performed locally or on the whole extraction area.";
|
matthiasm@0
|
350 d1.unit = "";
|
matthiasm@0
|
351 d1.minValue = 0;
|
matthiasm@0
|
352 d1.maxValue = 1;
|
matthiasm@0
|
353 d1.defaultValue = 1;
|
matthiasm@0
|
354 d1.isQuantized = true;
|
matthiasm@0
|
355 d1.valueNames.push_back("global tuning");
|
matthiasm@0
|
356 d1.valueNames.push_back("local tuning");
|
matthiasm@0
|
357 d1.quantizeStep = 1.0;
|
matthiasm@0
|
358 list.push_back(d1);
|
matthiasm@0
|
359
|
matthiasm@0
|
360 ParameterDescriptor d2;
|
matthiasm@0
|
361 d2.identifier = "paling";
|
matthiasm@0
|
362 d2.name = "spectral paling";
|
matthiasm@0
|
363 d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@0
|
364 d2.unit = "";
|
matthiasm@0
|
365 d2.minValue = 0;
|
matthiasm@0
|
366 d2.maxValue = 1;
|
matthiasm@0
|
367 d2.defaultValue = 0.5;
|
matthiasm@0
|
368 d2.isQuantized = false;
|
matthiasm@0
|
369 // d1.valueNames.push_back("global tuning");
|
matthiasm@0
|
370 // d1.valueNames.push_back("local tuning");
|
matthiasm@0
|
371 // d1.quantizeStep = 0.1;
|
matthiasm@0
|
372 list.push_back(d2);
|
matthiasm@0
|
373
|
matthiasm@0
|
374 return list;
|
matthiasm@0
|
375 }
|
matthiasm@0
|
376
|
matthiasm@0
|
377 float
|
matthiasm@0
|
378 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
379 {
|
matthiasm@0
|
380 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
381 if (identifier == "notedict") {
|
matthiasm@0
|
382 return m_dictID;
|
matthiasm@0
|
383 }
|
matthiasm@0
|
384
|
matthiasm@0
|
385 if (identifier == "paling") {
|
matthiasm@0
|
386 return m_paling;
|
matthiasm@0
|
387 }
|
matthiasm@0
|
388
|
matthiasm@0
|
389 if (identifier == "tuningmode") {
|
matthiasm@0
|
390 if (m_tuneLocal) {
|
matthiasm@0
|
391 return 1.0;
|
matthiasm@0
|
392 } else {
|
matthiasm@0
|
393 return 0.0;
|
matthiasm@0
|
394 }
|
matthiasm@0
|
395 }
|
matthiasm@0
|
396
|
matthiasm@0
|
397 return 0;
|
matthiasm@0
|
398
|
matthiasm@0
|
399 }
|
matthiasm@0
|
400
|
matthiasm@0
|
401 void
|
matthiasm@0
|
402 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
403 {
|
matthiasm@0
|
404 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
405 if (identifier == "notedict") {
|
matthiasm@0
|
406 m_dictID = (int) value;
|
matthiasm@0
|
407 }
|
matthiasm@0
|
408
|
matthiasm@0
|
409 if (identifier == "paling") {
|
matthiasm@0
|
410 m_paling = value;
|
matthiasm@0
|
411 }
|
matthiasm@0
|
412
|
matthiasm@0
|
413 if (identifier == "tuningmode") {
|
matthiasm@0
|
414 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
415 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
416 }
|
matthiasm@0
|
417 }
|
matthiasm@0
|
418
|
matthiasm@0
|
419 NNLSChroma::ProgramList
|
matthiasm@0
|
420 NNLSChroma::getPrograms() const
|
matthiasm@0
|
421 {
|
matthiasm@0
|
422 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
423 ProgramList list;
|
matthiasm@0
|
424
|
matthiasm@0
|
425 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
426 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
427
|
matthiasm@0
|
428 return list;
|
matthiasm@0
|
429 }
|
matthiasm@0
|
430
|
matthiasm@0
|
431 string
|
matthiasm@0
|
432 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
433 {
|
matthiasm@0
|
434 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
435 return ""; // no programs
|
matthiasm@0
|
436 }
|
matthiasm@0
|
437
|
matthiasm@0
|
438 void
|
matthiasm@0
|
439 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
440 {
|
matthiasm@0
|
441 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
442 }
|
matthiasm@0
|
443
|
matthiasm@0
|
444
|
matthiasm@0
|
445 NNLSChroma::OutputList
|
matthiasm@0
|
446 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
447 {
|
matthiasm@0
|
448 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
449 OutputList list;
|
matthiasm@0
|
450
|
matthiasm@0
|
451 // Make chroma names for the binNames property
|
matthiasm@0
|
452 vector<string> chromanames;
|
matthiasm@0
|
453 vector<string> bothchromanames;
|
matthiasm@0
|
454 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
455 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
456 if (iNote < 12) {
|
matthiasm@0
|
457 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
458 }
|
matthiasm@0
|
459 }
|
matthiasm@0
|
460
|
matthiasm@1
|
461 // int nNote = 84;
|
matthiasm@0
|
462
|
matthiasm@0
|
463 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
464 // Every plugin must have at least one output.
|
matthiasm@0
|
465
|
matthiasm@0
|
466 OutputDescriptor d0;
|
matthiasm@0
|
467 d0.identifier = "tuning";
|
matthiasm@0
|
468 d0.name = "Tuning";
|
matthiasm@0
|
469 d0.description = "The concert pitch.";
|
matthiasm@0
|
470 d0.unit = "Hz";
|
matthiasm@0
|
471 d0.hasFixedBinCount = true;
|
matthiasm@0
|
472 d0.binCount = 0;
|
matthiasm@0
|
473 d0.hasKnownExtents = true;
|
matthiasm@0
|
474 d0.minValue = 427.47;
|
matthiasm@0
|
475 d0.maxValue = 452.89;
|
matthiasm@0
|
476 d0.isQuantized = false;
|
matthiasm@0
|
477 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
478 d0.hasDuration = false;
|
matthiasm@0
|
479 list.push_back(d0);
|
matthiasm@0
|
480
|
matthiasm@0
|
481 OutputDescriptor d1;
|
matthiasm@0
|
482 d1.identifier = "logfreqspec";
|
matthiasm@0
|
483 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
484 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
485 d1.unit = "";
|
matthiasm@0
|
486 d1.hasFixedBinCount = true;
|
matthiasm@0
|
487 d1.binCount = nNote;
|
matthiasm@0
|
488 d1.hasKnownExtents = false;
|
matthiasm@0
|
489 d1.isQuantized = false;
|
matthiasm@0
|
490 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
491 d1.hasDuration = false;
|
matthiasm@0
|
492 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
493 list.push_back(d1);
|
matthiasm@0
|
494
|
matthiasm@0
|
495 OutputDescriptor d2;
|
matthiasm@0
|
496 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
497 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
498 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
499 d2.unit = "";
|
matthiasm@0
|
500 d2.hasFixedBinCount = true;
|
matthiasm@0
|
501 d2.binCount = 256;
|
matthiasm@0
|
502 d2.hasKnownExtents = false;
|
matthiasm@0
|
503 d2.isQuantized = false;
|
matthiasm@0
|
504 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
505 d2.hasDuration = false;
|
matthiasm@0
|
506 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
507 list.push_back(d2);
|
matthiasm@0
|
508
|
matthiasm@0
|
509 OutputDescriptor d3;
|
matthiasm@0
|
510 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
511 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
512 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
513 d3.unit = "";
|
matthiasm@0
|
514 d3.hasFixedBinCount = true;
|
matthiasm@0
|
515 d3.binCount = 84;
|
matthiasm@0
|
516 d3.hasKnownExtents = false;
|
matthiasm@0
|
517 d3.isQuantized = false;
|
matthiasm@0
|
518 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
519 d3.hasDuration = false;
|
matthiasm@0
|
520 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
521 list.push_back(d3);
|
matthiasm@0
|
522
|
matthiasm@0
|
523 OutputDescriptor d4;
|
matthiasm@0
|
524 d4.identifier = "chroma";
|
matthiasm@0
|
525 d4.name = "Chromagram";
|
matthiasm@0
|
526 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
527 d4.unit = "";
|
matthiasm@0
|
528 d4.hasFixedBinCount = true;
|
matthiasm@0
|
529 d4.binCount = 12;
|
matthiasm@0
|
530 d4.binNames = chromanames;
|
matthiasm@0
|
531 d4.hasKnownExtents = false;
|
matthiasm@0
|
532 d4.isQuantized = false;
|
matthiasm@0
|
533 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
534 d4.hasDuration = false;
|
matthiasm@0
|
535 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
536 list.push_back(d4);
|
matthiasm@0
|
537
|
matthiasm@0
|
538 OutputDescriptor d5;
|
matthiasm@0
|
539 d5.identifier = "basschroma";
|
matthiasm@0
|
540 d5.name = "Bass Chromagram";
|
matthiasm@0
|
541 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
542 d5.unit = "";
|
matthiasm@0
|
543 d5.hasFixedBinCount = true;
|
matthiasm@0
|
544 d5.binCount = 12;
|
matthiasm@0
|
545 d5.binNames = chromanames;
|
matthiasm@0
|
546 d5.hasKnownExtents = false;
|
matthiasm@0
|
547 d5.isQuantized = false;
|
matthiasm@0
|
548 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
549 d5.hasDuration = false;
|
matthiasm@0
|
550 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
551 list.push_back(d5);
|
matthiasm@0
|
552
|
matthiasm@0
|
553 OutputDescriptor d6;
|
matthiasm@0
|
554 d6.identifier = "bothchroma";
|
matthiasm@0
|
555 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
556 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
557 d6.unit = "";
|
matthiasm@0
|
558 d6.hasFixedBinCount = true;
|
matthiasm@0
|
559 d6.binCount = 24;
|
matthiasm@0
|
560 d6.binNames = bothchromanames;
|
matthiasm@0
|
561 d6.hasKnownExtents = false;
|
matthiasm@0
|
562 d6.isQuantized = false;
|
matthiasm@0
|
563 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
564 d6.hasDuration = false;
|
matthiasm@0
|
565 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
566 list.push_back(d6);
|
matthiasm@0
|
567
|
matthiasm@0
|
568 OutputDescriptor d7;
|
matthiasm@0
|
569 d7.identifier = "simplechord";
|
matthiasm@0
|
570 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
571 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
572 d7.unit = "";
|
matthiasm@0
|
573 d7.hasFixedBinCount = true;
|
matthiasm@0
|
574 d7.binCount = 0;
|
matthiasm@0
|
575 d7.hasKnownExtents = false;
|
matthiasm@0
|
576 d7.isQuantized = false;
|
matthiasm@0
|
577 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
578 d7.hasDuration = false;
|
matthiasm@0
|
579 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
580 list.push_back(d7);
|
matthiasm@0
|
581
|
matthiasm@1
|
582 // OutputDescriptor d8;
|
matthiasm@1
|
583 // d8.identifier = "inconsistency";
|
matthiasm@1
|
584 // d8.name = "Harmonic inconsistency value";
|
matthiasm@1
|
585 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@1
|
586 // d8.unit = "";
|
matthiasm@1
|
587 // d8.hasFixedBinCount = true;
|
matthiasm@1
|
588 // d8.binCount = 1;
|
matthiasm@1
|
589 // d8.hasKnownExtents = false;
|
matthiasm@1
|
590 // d8.isQuantized = false;
|
matthiasm@1
|
591 // d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
592 // d8.hasDuration = false;
|
matthiasm@1
|
593 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
594 // list.push_back(d8);
|
matthiasm@1
|
595 //
|
matthiasm@1
|
596 // OutputDescriptor d9;
|
matthiasm@1
|
597 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
598 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
599 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
600 // d9.unit = "";
|
matthiasm@1
|
601 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
602 // d9.binCount = 0;
|
matthiasm@1
|
603 // d9.hasKnownExtents = true;
|
matthiasm@1
|
604 // d9.minValue = 0.1;
|
matthiasm@1
|
605 // d9.maxValue = 0.9;
|
matthiasm@1
|
606 // d9.isQuantized = false;
|
matthiasm@1
|
607 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
608 // d9.hasDuration = false;
|
matthiasm@1
|
609 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
610 // list.push_back(d9);
|
matthiasm@1
|
611 //
|
matthiasm@1
|
612 OutputDescriptor d10;
|
matthiasm@1
|
613 d10.identifier = "localtuning";
|
matthiasm@1
|
614 d10.name = "Local tuning";
|
matthiasm@1
|
615 d10.description = "";
|
matthiasm@1
|
616 d10.unit = "Hz";
|
matthiasm@1
|
617 d10.hasFixedBinCount = true;
|
matthiasm@1
|
618 d10.binCount = 1;
|
matthiasm@1
|
619 d10.hasKnownExtents = true;
|
matthiasm@1
|
620 d10.minValue = 427.47;
|
matthiasm@1
|
621 d10.maxValue = 452.89;
|
matthiasm@1
|
622 d10.isQuantized = false;
|
matthiasm@1
|
623 d10.sampleType = OutputDescriptor::OneSamplePerStep;
|
matthiasm@1
|
624 d10.hasDuration = false;
|
matthiasm@1
|
625 d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
626 list.push_back(d10);
|
matthiasm@1
|
627
|
matthiasm@0
|
628 return list;
|
matthiasm@0
|
629 }
|
matthiasm@0
|
630
|
matthiasm@0
|
631
|
matthiasm@0
|
632 bool
|
matthiasm@0
|
633 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
634 {
|
matthiasm@1
|
635 if (debug_on) {
|
matthiasm@1
|
636 cerr << "--> initialise";
|
matthiasm@1
|
637 }
|
matthiasm@1
|
638
|
matthiasm@0
|
639 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
640 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
641 m_blockSize = blockSize;
|
matthiasm@0
|
642 m_stepSize = stepSize;
|
matthiasm@0
|
643 frameCount = 0;
|
matthiasm@0
|
644 int tempn = 256 * m_blockSize/2;
|
matthiasm@1
|
645 cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
646 float *tempkernel;
|
matthiasm@1
|
647
|
matthiasm@1
|
648 tempkernel = new float[tempn];
|
matthiasm@1
|
649
|
matthiasm@0
|
650 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
651 m_kernelValue.clear();
|
matthiasm@1
|
652 m_kernelFftIndex.clear();
|
matthiasm@1
|
653 m_kernelNoteIndex.clear();
|
matthiasm@1
|
654 int countNonzero = 0;
|
matthiasm@0
|
655 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
656 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
657 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
658 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
659 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
660 countNonzero++;
|
matthiasm@0
|
661 }
|
matthiasm@1
|
662 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
663 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
664 }
|
matthiasm@0
|
665 }
|
matthiasm@1
|
666 }
|
matthiasm@1
|
667 cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
668 delete [] tempkernel;
|
matthiasm@1
|
669
|
matthiasm@1
|
670
|
matthiasm@0
|
671 return true;
|
matthiasm@0
|
672 }
|
matthiasm@0
|
673
|
matthiasm@0
|
674 void
|
matthiasm@0
|
675 NNLSChroma::reset()
|
matthiasm@0
|
676 {
|
matthiasm@0
|
677 if (debug_on) cerr << "--> reset";
|
matthiasm@0
|
678 // Clear buffers, reset stored values, etc
|
matthiasm@0
|
679 frameCount = 0;
|
matthiasm@0
|
680 m_dictID = 0;
|
matthiasm@1
|
681 m_kernelValue.clear();
|
matthiasm@1
|
682 m_kernelFftIndex.clear();
|
matthiasm@1
|
683 m_kernelNoteIndex.clear();
|
matthiasm@0
|
684 }
|
matthiasm@0
|
685
|
matthiasm@0
|
686 NNLSChroma::FeatureSet
|
matthiasm@0
|
687 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
688 {
|
matthiasm@0
|
689 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
690 // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix
|
matthiasm@0
|
691
|
matthiasm@0
|
692 frameCount++;
|
matthiasm@0
|
693 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
694
|
matthiasm@0
|
695 Feature f10; // local tuning
|
matthiasm@0
|
696
|
matthiasm@0
|
697 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
698
|
matthiasm@0
|
699 // make magnitude
|
matthiasm@0
|
700 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
701 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@0
|
702 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@1
|
703 // magnitude[iBin] = (iBin == frameCount - 1 || frameCount < 2) ? 1.0 : 0.0;
|
matthiasm@0
|
704 }
|
matthiasm@0
|
705
|
matthiasm@0
|
706
|
matthiasm@0
|
707 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
708 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
709 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
710 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
711 }
|
matthiasm@0
|
712 int binCount = 0;
|
matthiasm@0
|
713 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
714 // cerr << ".";
|
matthiasm@1
|
715 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
716 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
717 binCount++;
|
matthiasm@0
|
718 }
|
matthiasm@1
|
719 // cerr << nm[20];
|
matthiasm@1
|
720 // cerr << endl;
|
matthiasm@0
|
721
|
matthiasm@0
|
722
|
matthiasm@0
|
723 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
724 // update means of complex tuning variables
|
matthiasm@0
|
725 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
726 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
727 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
728
|
matthiasm@0
|
729 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
730 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
731 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
732 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@0
|
733 m_localTuning0 *= 0.99994; m_localTuning0 += nm[iTone + 0];
|
matthiasm@0
|
734 m_localTuning1 *= 0.99994; m_localTuning1 += nm[iTone + 1];
|
matthiasm@0
|
735 m_localTuning2 *= 0.99994; m_localTuning2 += nm[iTone + 2];
|
matthiasm@0
|
736 }
|
matthiasm@0
|
737
|
matthiasm@0
|
738 // if (m_tuneLocal) {
|
matthiasm@0
|
739 // local tuning
|
matthiasm@0
|
740 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
741 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
742 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
743 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
744 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
745 f10.values.push_back(tuning440);
|
matthiasm@0
|
746 // }
|
matthiasm@0
|
747
|
matthiasm@0
|
748 Feature f1; // logfreqspec
|
matthiasm@0
|
749 f1.hasTimestamp = true;
|
matthiasm@0
|
750 f1.timestamp = timestamp;
|
matthiasm@0
|
751 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
752 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
753 }
|
matthiasm@0
|
754
|
matthiasm@0
|
755 FeatureSet fs;
|
matthiasm@0
|
756 fs[1].push_back(f1);
|
matthiasm@0
|
757 fs[10].push_back(f10);
|
matthiasm@0
|
758
|
matthiasm@0
|
759 // deletes
|
matthiasm@0
|
760 delete[] magnitude;
|
matthiasm@0
|
761 delete[] nm;
|
matthiasm@0
|
762
|
matthiasm@0
|
763 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@0
|
764 return fs;
|
matthiasm@0
|
765 }
|
matthiasm@0
|
766
|
matthiasm@0
|
767 NNLSChroma::FeatureSet
|
matthiasm@0
|
768 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
769 {
|
matthiasm@0
|
770 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@0
|
771 FeatureSet fsOut;
|
matthiasm@0
|
772 //
|
matthiasm@1
|
773 /** Calculate Tuning
|
matthiasm@1
|
774 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
775 cumulative mean real and imag values)
|
matthiasm@1
|
776 **/
|
matthiasm@1
|
777 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
778 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
779 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
780 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
781 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
782 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
783
|
matthiasm@1
|
784 char buffer0 [50];
|
matthiasm@1
|
785
|
matthiasm@1
|
786 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
787
|
matthiasm@1
|
788 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
789
|
matthiasm@1
|
790 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
791 Feature f0; // tuning
|
matthiasm@1
|
792 f0.hasTimestamp = true;
|
matthiasm@1
|
793 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
794 f0.label = buffer0;
|
matthiasm@1
|
795 fsOut[0].push_back(f0);
|
matthiasm@1
|
796
|
matthiasm@1
|
797 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
798 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
799 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@1
|
800 **/
|
matthiasm@1
|
801
|
matthiasm@1
|
802 float tempValue = 0;
|
matthiasm@1
|
803 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
804 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
805 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
806 int count = 0;
|
matthiasm@1
|
807
|
matthiasm@1
|
808 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
809 Feature f1 = *i;
|
matthiasm@1
|
810 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
811 f2.hasTimestamp = true;
|
matthiasm@1
|
812 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
813 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
814
|
matthiasm@1
|
815 if (m_tuneLocal) {
|
matthiasm@1
|
816 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
817 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
818 }
|
matthiasm@1
|
819
|
matthiasm@1
|
820 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
821
|
matthiasm@1
|
822 for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
823 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
824 f2.values.push_back(tempValue);
|
matthiasm@1
|
825 }
|
matthiasm@1
|
826
|
matthiasm@1
|
827 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
828 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
829 vector<float> runningstd;
|
matthiasm@1
|
830 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
831 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
832 }
|
matthiasm@1
|
833 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
834 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
835 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
836 if (runningstd[i] > 0) {
|
matthiasm@1
|
837 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
838 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
839 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
840 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
841 }
|
matthiasm@1
|
842 if (f2.values[i] < 0) {
|
matthiasm@1
|
843 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
844 }
|
matthiasm@1
|
845 }
|
matthiasm@1
|
846 fsOut[2].push_back(f2);
|
matthiasm@1
|
847 count++;
|
matthiasm@1
|
848 }
|
matthiasm@1
|
849
|
matthiasm@1
|
850 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
851 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
852 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
853 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
854 bass and treble stacked onto each other).
|
matthiasm@1
|
855 **/
|
matthiasm@1
|
856 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
|
matthiasm@1
|
857
|
matthiasm@1
|
858 vector<vector<float> > chordogram;
|
matthiasm@1
|
859 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
860 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
861 count = 0;
|
matthiasm@1
|
862
|
matthiasm@1
|
863 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
864 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
865 Feature f3; // semitone spectrum
|
matthiasm@1
|
866 Feature f4; // treble chromagram
|
matthiasm@1
|
867 Feature f5; // bass chromagram
|
matthiasm@1
|
868 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
869
|
matthiasm@1
|
870 f3.hasTimestamp = true;
|
matthiasm@1
|
871 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
872
|
matthiasm@1
|
873 f4.hasTimestamp = true;
|
matthiasm@1
|
874 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
875
|
matthiasm@1
|
876 f5.hasTimestamp = true;
|
matthiasm@1
|
877 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
878
|
matthiasm@1
|
879 f6.hasTimestamp = true;
|
matthiasm@1
|
880 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
881
|
matthiasm@1
|
882 double b[256];
|
matthiasm@1
|
883
|
matthiasm@1
|
884 bool some_b_greater_zero = false;
|
matthiasm@1
|
885 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
886 b[i] = f2.values[i];
|
matthiasm@1
|
887 if (b[i] > 0) {
|
matthiasm@1
|
888 some_b_greater_zero = true;
|
matthiasm@1
|
889 }
|
matthiasm@1
|
890 }
|
matthiasm@1
|
891
|
matthiasm@1
|
892 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
893
|
matthiasm@1
|
894 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
895 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
896 float currval;
|
matthiasm@1
|
897 unsigned iSemitone = 0;
|
matthiasm@1
|
898
|
matthiasm@1
|
899 if (some_b_greater_zero) {
|
matthiasm@1
|
900 if (m_dictID == 0) {
|
matthiasm@1
|
901 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
902 currval = 0;
|
matthiasm@1
|
903 for (unsigned iBin = 0; iBin < 3; ++iBin) {
|
matthiasm@1
|
904 currval += b[iNote + iBin];
|
matthiasm@1
|
905 }
|
matthiasm@1
|
906 f3.values.push_back(currval);
|
matthiasm@1
|
907 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
908 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
909 iSemitone++;
|
matthiasm@1
|
910 }
|
matthiasm@1
|
911
|
matthiasm@1
|
912 } else {
|
matthiasm@1
|
913 double x[84+1] = {1.0};
|
matthiasm@1
|
914 double rnorm;
|
matthiasm@1
|
915 double w[84+1];
|
matthiasm@1
|
916 double zz[84+1];
|
matthiasm@1
|
917 int indx[84+2];
|
matthiasm@1
|
918 int mode;
|
matthiasm@1
|
919
|
matthiasm@1
|
920 nnls(m_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@1
|
921 }
|
matthiasm@1
|
922 }
|
matthiasm@1
|
923
|
matthiasm@1
|
924 f4.values = chroma;
|
matthiasm@1
|
925 f5.values = basschroma;
|
matthiasm@1
|
926 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
927 f6.values = chroma;
|
matthiasm@1
|
928
|
matthiasm@1
|
929 // local chord estimation
|
matthiasm@1
|
930 vector<float> currentChordSalience;
|
matthiasm@1
|
931 float tempchordvalue = 0;
|
matthiasm@1
|
932 float sumchordvalue = 0;
|
matthiasm@1
|
933 int nChord = nChorddict / 24;
|
matthiasm@1
|
934 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
935 tempchordvalue = 0;
|
matthiasm@1
|
936 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@1
|
937 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
938 }
|
matthiasm@1
|
939 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@1
|
940 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
941 }
|
matthiasm@1
|
942 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
943 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
944 }
|
matthiasm@1
|
945 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
946 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@1
|
947 }
|
matthiasm@1
|
948 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
949
|
matthiasm@1
|
950 fsOut[3].push_back(f3);
|
matthiasm@1
|
951 fsOut[4].push_back(f4);
|
matthiasm@1
|
952 fsOut[5].push_back(f5);
|
matthiasm@1
|
953 fsOut[6].push_back(f6);
|
matthiasm@1
|
954 // if (x) free(x);
|
matthiasm@1
|
955 // delete[] b;
|
matthiasm@1
|
956 count++;
|
matthiasm@1
|
957 }
|
matthiasm@0
|
958 // // cerr << m_stepSize << endl<< endl;
|
matthiasm@0
|
959 // count = 0;
|
matthiasm@0
|
960 // int kernelwidth = (49 * 2048) / m_stepSize;
|
matthiasm@0
|
961 // int nChord = nChorddict / 24;
|
matthiasm@0
|
962 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
|
matthiasm@0
|
963 //
|
matthiasm@0
|
964 // /* Simple chord estimation
|
matthiasm@0
|
965 // I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@0
|
966 // take the maximum. Very simple, don't do this at home...
|
matthiasm@0
|
967 // */
|
matthiasm@0
|
968 // vector<int> chordSequence;
|
matthiasm@0
|
969 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@0
|
970 //
|
matthiasm@0
|
971 // int startIndex = max(count - kernelwidth/2 + 1,0);
|
matthiasm@0
|
972 // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1 + 1);
|
matthiasm@0
|
973 // vector<float> temp = vector<float>(nChord,0);
|
matthiasm@0
|
974 // for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@0
|
975 // float val = 0;
|
matthiasm@0
|
976 // for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@0
|
977 // val += chordogram[i][iChord] *
|
matthiasm@0
|
978 // (kernelwidth - abs(i - startIndex - kernelwidth * 0.5)); // weigthed sum (triangular window)
|
matthiasm@0
|
979 // }
|
matthiasm@0
|
980 // temp[iChord] = val; // sum
|
matthiasm@0
|
981 // }
|
matthiasm@0
|
982 //
|
matthiasm@0
|
983 // // get maximum for "chord estimate"
|
matthiasm@0
|
984 //
|
matthiasm@0
|
985 // float bestChordValue = 0;
|
matthiasm@0
|
986 // int bestChordIndex = nChord-1; // "no chord" is default
|
matthiasm@0
|
987 // for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@0
|
988 // if (temp[iChord] > bestChordValue) {
|
matthiasm@0
|
989 // bestChordValue = temp[iChord];
|
matthiasm@0
|
990 // bestChordIndex = iChord;
|
matthiasm@0
|
991 // }
|
matthiasm@0
|
992 // }
|
matthiasm@0
|
993 // // cerr << bestChordIndex << endl;
|
matthiasm@0
|
994 // chordSequence.push_back(bestChordIndex);
|
matthiasm@0
|
995 // count++;
|
matthiasm@0
|
996 // }
|
matthiasm@0
|
997 // // mode filter on chordSequence
|
matthiasm@0
|
998 // count = 0;
|
matthiasm@0
|
999 // int oldChordIndex = -1;
|
matthiasm@0
|
1000 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@0
|
1001 // Feature f6 = *it;
|
matthiasm@0
|
1002 // Feature f7; // chord estimate
|
matthiasm@0
|
1003 //
|
matthiasm@0
|
1004 // f7.hasTimestamp = true;
|
matthiasm@0
|
1005 // f7.timestamp = f6.timestamp;
|
matthiasm@0
|
1006 //
|
matthiasm@0
|
1007 // vector<int> chordCount = vector<int>(121,0);
|
matthiasm@0
|
1008 //
|
matthiasm@0
|
1009 // int maxChordCount = 0;
|
matthiasm@0
|
1010 // int maxChordIndex = 120;
|
matthiasm@0
|
1011 // int startIndex = max(count - kernelwidth/2,0);
|
matthiasm@0
|
1012 // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1);
|
matthiasm@0
|
1013 // for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@0
|
1014 // chordCount[chordSequence[i]]++;
|
matthiasm@0
|
1015 // if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@0
|
1016 // maxChordCount++;
|
matthiasm@0
|
1017 // maxChordIndex = chordSequence[i];
|
matthiasm@0
|
1018 // }
|
matthiasm@0
|
1019 // }
|
matthiasm@0
|
1020 // if (oldChordIndex != maxChordIndex) {
|
matthiasm@0
|
1021 // oldChordIndex = maxChordIndex;
|
matthiasm@0
|
1022 //
|
matthiasm@0
|
1023 // char buffer1 [50];
|
matthiasm@0
|
1024 // if (maxChordIndex < nChord - 1) {
|
matthiasm@0
|
1025 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@0
|
1026 // } else {
|
matthiasm@0
|
1027 // sprintf(buffer1, "N");
|
matthiasm@0
|
1028 // }
|
matthiasm@0
|
1029 // f7.label = buffer1;
|
matthiasm@0
|
1030 // fsOut[7].push_back(f7);
|
matthiasm@0
|
1031 // }
|
matthiasm@0
|
1032 // count++;
|
matthiasm@0
|
1033 // }
|
matthiasm@0
|
1034 // // musicity
|
matthiasm@0
|
1035 // count = 0;
|
matthiasm@0
|
1036 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1037 // vector<float> musicityValue;
|
matthiasm@0
|
1038 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1039 // Feature f4 = *it;
|
matthiasm@0
|
1040 //
|
matthiasm@0
|
1041 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1042 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1043 // float chromasum = 0;
|
matthiasm@0
|
1044 // float diffsum = 0;
|
matthiasm@0
|
1045 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1046 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1047 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1048 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1049 // }
|
matthiasm@0
|
1050 // }
|
matthiasm@0
|
1051 // diffsum /= chromasum;
|
matthiasm@0
|
1052 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1053 // count++;
|
matthiasm@0
|
1054 // }
|
matthiasm@0
|
1055 //
|
matthiasm@0
|
1056 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1057 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1058 // musicityThreshold = 0.74;
|
matthiasm@0
|
1059 // }
|
matthiasm@0
|
1060 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1061 // musicityThreshold = 0.77;
|
matthiasm@0
|
1062 // }
|
matthiasm@0
|
1063 //
|
matthiasm@0
|
1064 // count = 0;
|
matthiasm@0
|
1065 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1066 // Feature f4 = *it;
|
matthiasm@0
|
1067 // Feature f8; // musicity
|
matthiasm@0
|
1068 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1069 //
|
matthiasm@0
|
1070 // f8.hasTimestamp = true;
|
matthiasm@0
|
1071 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1072 // f9.hasTimestamp = true;
|
matthiasm@0
|
1073 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1074 //
|
matthiasm@0
|
1075 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1076 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1077 // int musicityCount = 0;
|
matthiasm@0
|
1078 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1079 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1080 // }
|
matthiasm@0
|
1081 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1082 //
|
matthiasm@0
|
1083 // if (isSpeech) {
|
matthiasm@0
|
1084 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1085 // f9.label = "Speech";
|
matthiasm@0
|
1086 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1087 // oldlabeltype = 2;
|
matthiasm@0
|
1088 // }
|
matthiasm@0
|
1089 // } else {
|
matthiasm@0
|
1090 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1091 // f9.label = "Music";
|
matthiasm@0
|
1092 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1093 // oldlabeltype = 1;
|
matthiasm@0
|
1094 // }
|
matthiasm@0
|
1095 // }
|
matthiasm@0
|
1096 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1097 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1098 // count++;
|
matthiasm@0
|
1099 // }
|
matthiasm@0
|
1100 return fsOut;
|
matthiasm@0
|
1101
|
matthiasm@0
|
1102 }
|
matthiasm@0
|
1103
|