matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@10
|
4 // #include <omp.h>
|
matthiasm@0
|
5 #include <list>
|
matthiasm@0
|
6 #include <iostream>
|
matthiasm@3
|
7 #include <fstream>
|
matthiasm@0
|
8 #include <sstream>
|
matthiasm@0
|
9 #include <cassert>
|
matthiasm@7
|
10 #include <cstdlib>
|
matthiasm@0
|
11 #include <cstdio>
|
matthiasm@7
|
12 #include <boost/tokenizer.hpp>
|
matthiasm@7
|
13 #include <boost/iostreams/device/file.hpp>
|
matthiasm@7
|
14 #include <boost/iostreams/stream.hpp>
|
matthiasm@7
|
15 #include <boost/lexical_cast.hpp>
|
matthiasm@1
|
16 #include "nnls.h"
|
matthiasm@0
|
17 #include "chorddict.cpp"
|
matthiasm@9
|
18
|
matthiasm@10
|
19 // #include <omp.h>
|
matthiasm@10
|
20 // #define N 1000
|
matthiasm@10
|
21 // #define CHUNKSIZE 100
|
matthiasm@9
|
22
|
matthiasm@9
|
23
|
matthiasm@0
|
24 using namespace std;
|
matthiasm@7
|
25 using namespace boost;
|
matthiasm@0
|
26
|
matthiasm@0
|
27 const float sinvalue = 0.866025404;
|
matthiasm@0
|
28 const float cosvalue = -0.5;
|
matthiasm@0
|
29 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
30 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
31 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
32 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
33 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@7
|
34
|
matthiasm@7
|
35 const char* bassnames[12][12] ={
|
matthiasm@7
|
36 {"A","","B","C","C#","D","","E","","F#","G","G#"},
|
matthiasm@7
|
37 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
|
matthiasm@7
|
38 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
|
matthiasm@7
|
39 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
|
matthiasm@7
|
40 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
|
matthiasm@7
|
41 {"D","","E","F","F#","G","","A","","B","C","C#"},
|
matthiasm@7
|
42 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
|
matthiasm@7
|
43 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
|
matthiasm@7
|
44 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
|
matthiasm@7
|
45 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
|
matthiasm@7
|
46 {"G","","A","Bb","B","C","","D","","E","F","F#"},
|
matthiasm@7
|
47 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
|
matthiasm@7
|
48 };
|
matthiasm@17
|
49
|
matthiasm@17
|
50
|
matthiasm@17
|
51 // const char* bassnames[12][12] ={
|
matthiasm@17
|
52 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
53 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
54 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
55 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
56 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
57 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
58 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
59 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
60 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
61 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
62 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
63 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
64 // };
|
matthiasm@17
|
65
|
matthiasm@0
|
66 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
67 const int nNote = 256;
|
matthiasm@0
|
68
|
matthiasm@0
|
69 /** Special Convolution
|
matthiasm@0
|
70 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
71 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
72 as the first (last) valid convolution bin.
|
matthiasm@0
|
73 **/
|
matthiasm@0
|
74
|
matthiasm@0
|
75 const bool debug_on = false;
|
matthiasm@0
|
76
|
matthiasm@0
|
77 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
78 {
|
matthiasm@0
|
79 float s;
|
matthiasm@0
|
80 int m, n;
|
matthiasm@0
|
81 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
82 int lenKernel = kernel.size();
|
matthiasm@0
|
83
|
matthiasm@0
|
84 vector<float> Z(256,0);
|
matthiasm@0
|
85 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
86
|
matthiasm@0
|
87 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
88 s=0.0;
|
matthiasm@0
|
89 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
90 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
91 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
92 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
93 }
|
matthiasm@0
|
94 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
95 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
96 }
|
matthiasm@0
|
97
|
matthiasm@0
|
98 // fill upper and lower pads
|
matthiasm@0
|
99 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
100 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
101 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
102 return Z;
|
matthiasm@0
|
103 }
|
matthiasm@0
|
104
|
matthiasm@0
|
105 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
106 // {
|
matthiasm@0
|
107 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
108 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
109 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
110 // }
|
matthiasm@0
|
111 // return freq;
|
matthiasm@0
|
112 // }
|
matthiasm@0
|
113
|
matthiasm@0
|
114 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
115 {
|
matthiasm@0
|
116 float recipwidth = 1.0/width;
|
matthiasm@0
|
117 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
118 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
119 }
|
matthiasm@0
|
120 return 0.0;
|
matthiasm@0
|
121 }
|
matthiasm@0
|
122
|
matthiasm@0
|
123 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
124 {
|
matthiasm@0
|
125 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
126 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
127 // now scale to correct for note density
|
matthiasm@0
|
128 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
129 if (x > 0) {
|
matthiasm@0
|
130 out = out / (c * x);
|
matthiasm@0
|
131 } else {
|
matthiasm@0
|
132 out = 0;
|
matthiasm@0
|
133 }
|
matthiasm@0
|
134 return out;
|
matthiasm@0
|
135 }
|
matthiasm@0
|
136
|
matthiasm@0
|
137 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
138
|
matthiasm@0
|
139 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
140 int minoctave = 0; // this must be 0
|
matthiasm@0
|
141 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
142 int oversampling = 80;
|
matthiasm@0
|
143
|
matthiasm@0
|
144 // linear frequency vector
|
matthiasm@0
|
145 vector<float> fft_f;
|
matthiasm@0
|
146 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
147 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
148 }
|
matthiasm@0
|
149 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
150
|
matthiasm@0
|
151 // linear oversampled frequency vector
|
matthiasm@0
|
152 vector<float> oversampled_f;
|
matthiasm@0
|
153 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
154 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
155 }
|
matthiasm@0
|
156
|
matthiasm@0
|
157 // pitch-spaced frequency vector
|
matthiasm@0
|
158 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
159 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
160 vector<float> cq_f;
|
matthiasm@0
|
161 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
162 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
163 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
164 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
165 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
166 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
167 }
|
matthiasm@0
|
168 }
|
matthiasm@0
|
169 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
170 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
171
|
matthiasm@0
|
172 int nFFT = fft_f.size();
|
matthiasm@0
|
173
|
matthiasm@0
|
174 vector<float> fft_activation;
|
matthiasm@0
|
175 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
176 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
177 fft_activation.push_back(cosp);
|
matthiasm@0
|
178 // cerr << cosp << endl;
|
matthiasm@0
|
179 }
|
matthiasm@0
|
180
|
matthiasm@0
|
181 float cq_activation;
|
matthiasm@0
|
182 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
183 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
184 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
185 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
186 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
187 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
188 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
189 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
190 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
191 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
192 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
193 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
194 }
|
matthiasm@0
|
195 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
196 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
197 // }
|
matthiasm@0
|
198 }
|
matthiasm@0
|
199 }
|
matthiasm@0
|
200 }
|
matthiasm@0
|
201 return true;
|
matthiasm@0
|
202 }
|
matthiasm@0
|
203
|
matthiasm@17
|
204 void dictionaryMatrix(float* dm) {
|
matthiasm@1
|
205 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
206 int minoctave = 0; // this must be 0
|
matthiasm@1
|
207 int maxoctave = 7; // this must be 7
|
matthiasm@4
|
208 float s_param = 0.7;
|
matthiasm@1
|
209
|
matthiasm@1
|
210 // pitch-spaced frequency vector
|
matthiasm@1
|
211 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
212 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
213 vector<float> cq_f;
|
matthiasm@1
|
214 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
215 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
216 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
217 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
218 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
219 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
220 }
|
matthiasm@1
|
221 }
|
matthiasm@1
|
222 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
223 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
224
|
matthiasm@1
|
225 float curr_f;
|
matthiasm@1
|
226 float floatbin;
|
matthiasm@1
|
227 float curr_amp;
|
matthiasm@1
|
228 // now for every combination calculate the matrix element
|
matthiasm@1
|
229 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@3
|
230 // cerr << iOut << endl;
|
matthiasm@1
|
231 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
232 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@3
|
233 // if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@3
|
234 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@3
|
235 // cerr << floatbin << endl;
|
matthiasm@1
|
236 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@3
|
237 // cerr << "curramp" << curr_amp << endl;
|
matthiasm@1
|
238 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@3
|
239 if (abs(iNote+1.0-floatbin)<2) {
|
matthiasm@3
|
240 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
matthiasm@3
|
241 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
matthiasm@3
|
242 }
|
matthiasm@1
|
243 }
|
matthiasm@3
|
244 }
|
matthiasm@1
|
245 }
|
matthiasm@3
|
246
|
matthiasm@3
|
247
|
matthiasm@1
|
248 }
|
matthiasm@1
|
249
|
matthiasm@7
|
250 string get_env_var( std::string const & key ) {
|
matthiasm@7
|
251 char * val;
|
matthiasm@7
|
252 val = getenv( key.c_str() );
|
matthiasm@7
|
253 string retval;
|
matthiasm@7
|
254 if (val != NULL) {
|
matthiasm@7
|
255 retval = val;
|
matthiasm@7
|
256 }
|
matthiasm@7
|
257 return retval;
|
matthiasm@7
|
258 }
|
matthiasm@7
|
259
|
matthiasm@7
|
260
|
matthiasm@9
|
261 vector<string> chordDictionary(vector<float> *mchorddict) {
|
matthiasm@7
|
262 // ifstream chordDictFile;
|
matthiasm@7
|
263 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
|
matthiasm@7
|
264 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
|
matthiasm@7
|
265 typedef tokenizer<char_separator<char> > Tok;
|
matthiasm@7
|
266 // char_separator<char> sep; // default constructed
|
matthiasm@17
|
267 char_separator<char> sep(",; ","=");
|
matthiasm@7
|
268 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
|
matthiasm@7
|
269 string line;
|
matthiasm@7
|
270 int iElement = 0;
|
matthiasm@7
|
271 int nChord = 0;
|
matthiasm@7
|
272
|
matthiasm@7
|
273 vector<string> loadedChordNames;
|
matthiasm@7
|
274 vector<float> loadedChordDict;
|
matthiasm@7
|
275 if (chordDictFile.is_open()) {
|
matthiasm@7
|
276 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
|
matthiasm@7
|
277 // first, get the chord definition
|
matthiasm@7
|
278 string chordType;
|
matthiasm@7
|
279 vector<float> tempPCVector;
|
matthiasm@7
|
280 // cerr << line << endl;
|
matthiasm@7
|
281 if (!line.empty() && line.substr(0,1) != "#") {
|
matthiasm@7
|
282 Tok tok(line, sep);
|
matthiasm@7
|
283 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
|
matthiasm@7
|
284 string tempString = *tok_iter;
|
matthiasm@7
|
285 // cerr << tempString << endl;
|
matthiasm@7
|
286 if (tok_iter == tok.begin()) { // either the chord name or a colon
|
matthiasm@17
|
287 if (tempString == "=") {
|
matthiasm@7
|
288 chordType = "";
|
matthiasm@7
|
289 } else {
|
matthiasm@7
|
290 chordType = tempString;
|
matthiasm@7
|
291 tok_iter++; // is this cheating ? :)
|
matthiasm@7
|
292 }
|
matthiasm@7
|
293 } else {
|
matthiasm@7
|
294 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
|
matthiasm@7
|
295 }
|
matthiasm@7
|
296 }
|
matthiasm@7
|
297
|
matthiasm@7
|
298 // now make all 12 chords of every type
|
matthiasm@7
|
299 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
|
matthiasm@7
|
300 // add bass slash notation
|
matthiasm@7
|
301 string slashNotation = "";
|
matthiasm@7
|
302 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
|
matthiasm@7
|
303 if (tempPCVector[(kSemitone) % 12] > 0.99) {
|
matthiasm@7
|
304 slashNotation = bassnames[iSemitone][kSemitone];
|
matthiasm@7
|
305 }
|
matthiasm@7
|
306 }
|
matthiasm@7
|
307 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
|
matthiasm@9
|
308 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
|
matthiasm@9
|
309 float bassValue = 0;
|
matthiasm@9
|
310 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
|
matthiasm@9
|
311 bassValue = 1;
|
matthiasm@9
|
312 } else {
|
matthiasm@10
|
313 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
|
matthiasm@9
|
314 }
|
matthiasm@9
|
315 loadedChordDict.push_back(bassValue);
|
matthiasm@7
|
316 }
|
matthiasm@7
|
317 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
|
matthiasm@7
|
318 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
|
matthiasm@7
|
319 }
|
matthiasm@7
|
320 ostringstream os;
|
matthiasm@7
|
321 if (slashNotation.empty()) {
|
matthiasm@7
|
322 os << notenames[12+iSemitone] << chordType;
|
matthiasm@7
|
323 } else {
|
matthiasm@7
|
324 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
|
matthiasm@7
|
325 }
|
matthiasm@17
|
326 // cerr << os.str() << endl;
|
matthiasm@7
|
327 loadedChordNames.push_back(os.str());
|
matthiasm@7
|
328 }
|
matthiasm@7
|
329 }
|
matthiasm@7
|
330 }
|
matthiasm@7
|
331 // N type
|
matthiasm@7
|
332 loadedChordNames.push_back("N");
|
matthiasm@7
|
333 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
|
matthiasm@7
|
334 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
|
matthiasm@7
|
335
|
matthiasm@7
|
336 // normalise
|
matthiasm@7
|
337 float sum = 0;
|
matthiasm@7
|
338 for (int i = 0; i < loadedChordDict.size(); i++) {
|
matthiasm@7
|
339 sum += pow(loadedChordDict[i],2);
|
matthiasm@7
|
340 if (i % 24 == 23) {
|
matthiasm@7
|
341 float invertedsum = 1.0/sqrt(sum);
|
matthiasm@7
|
342 for (int k = 0; k < 24; k++) {
|
matthiasm@7
|
343 loadedChordDict[i-k] *= invertedsum;
|
matthiasm@7
|
344 }
|
matthiasm@7
|
345 sum = 0;
|
matthiasm@7
|
346 }
|
matthiasm@7
|
347
|
matthiasm@7
|
348 }
|
matthiasm@7
|
349
|
matthiasm@7
|
350
|
matthiasm@7
|
351 nChord = 0;
|
matthiasm@7
|
352 for (int i = 0; i < loadedChordNames.size(); i++) {
|
matthiasm@7
|
353 nChord++;
|
matthiasm@7
|
354 }
|
matthiasm@7
|
355 chordDictFile.close();
|
matthiasm@7
|
356
|
matthiasm@7
|
357
|
matthiasm@9
|
358 // mchorddict = new float[nChord*24];
|
matthiasm@7
|
359 for (int i = 0; i < nChord*24; i++) {
|
matthiasm@9
|
360 mchorddict->push_back(loadedChordDict[i]);
|
matthiasm@7
|
361 }
|
matthiasm@9
|
362
|
matthiasm@7
|
363 } else {// use default from chorddict.cpp
|
matthiasm@9
|
364 // mchorddict = new float[nChorddict];
|
matthiasm@7
|
365 for (int i = 0; i < nChorddict; i++) {
|
matthiasm@9
|
366 mchorddict->push_back(chorddict[i]);
|
matthiasm@7
|
367 }
|
matthiasm@7
|
368
|
matthiasm@7
|
369 nChord = nChorddict/24;
|
matthiasm@7
|
370 // mchordnames = new string[nChorddict/24];
|
matthiasm@7
|
371 char buffer1 [50];
|
matthiasm@7
|
372 for (int i = 0; i < nChorddict/24; i++) {
|
matthiasm@7
|
373 if (i < nChorddict/24 - 1) {
|
matthiasm@7
|
374 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
|
matthiasm@7
|
375 } else {
|
matthiasm@7
|
376 sprintf(buffer1, "N");
|
matthiasm@7
|
377 }
|
matthiasm@7
|
378 ostringstream os;
|
matthiasm@7
|
379 os << buffer1;
|
matthiasm@9
|
380 loadedChordNames.push_back(os.str());
|
matthiasm@9
|
381
|
matthiasm@7
|
382 }
|
matthiasm@7
|
383
|
matthiasm@7
|
384 }
|
matthiasm@9
|
385 // cerr << "before leaving" << chordnames[1] << endl;
|
matthiasm@9
|
386 return loadedChordNames;
|
matthiasm@7
|
387 }
|
matthiasm@0
|
388
|
matthiasm@0
|
389 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
390 Plugin(inputSampleRate),
|
matthiasm@0
|
391 m_fl(0),
|
matthiasm@0
|
392 m_blockSize(0),
|
matthiasm@0
|
393 m_stepSize(0),
|
matthiasm@0
|
394 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
395 m_meanTuning0(0),
|
matthiasm@0
|
396 m_meanTuning1(0),
|
matthiasm@0
|
397 m_meanTuning2(0),
|
matthiasm@0
|
398 m_localTuning0(0),
|
matthiasm@0
|
399 m_localTuning1(0),
|
matthiasm@0
|
400 m_localTuning2(0),
|
matthiasm@4
|
401 m_paling(1.0),
|
matthiasm@3
|
402 m_preset(0.0),
|
matthiasm@0
|
403 m_localTuning(0),
|
matthiasm@0
|
404 m_kernelValue(0),
|
matthiasm@0
|
405 m_kernelFftIndex(0),
|
matthiasm@0
|
406 m_kernelNoteIndex(0),
|
matthiasm@1
|
407 m_dict(0),
|
matthiasm@0
|
408 m_tuneLocal(false),
|
matthiasm@7
|
409 m_dictID(0),
|
matthiasm@7
|
410 m_chorddict(0),
|
matthiasm@12
|
411 m_chordnames(0),
|
matthiasm@17
|
412 m_doNormalizeChroma(0),
|
matthiasm@17
|
413 m_rollon(0.01)
|
matthiasm@0
|
414 {
|
matthiasm@0
|
415 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@7
|
416
|
matthiasm@7
|
417 // make the *note* dictionary matrix
|
matthiasm@3
|
418 m_dict = new float[nNote * 84];
|
matthiasm@3
|
419 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@1
|
420 dictionaryMatrix(m_dict);
|
matthiasm@7
|
421
|
matthiasm@7
|
422 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@9
|
423 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
424 }
|
matthiasm@0
|
425
|
matthiasm@0
|
426
|
matthiasm@0
|
427 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
428 {
|
matthiasm@0
|
429 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
430 delete [] m_dict;
|
matthiasm@9
|
431 // delete [] m_chorddict;
|
matthiasm@7
|
432 // delete m_chordnames;
|
matthiasm@0
|
433 }
|
matthiasm@0
|
434
|
matthiasm@0
|
435 string
|
matthiasm@0
|
436 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
437 {
|
matthiasm@0
|
438 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
439 return "nnls_chroma";
|
matthiasm@0
|
440 }
|
matthiasm@0
|
441
|
matthiasm@0
|
442 string
|
matthiasm@0
|
443 NNLSChroma::getName() const
|
matthiasm@0
|
444 {
|
matthiasm@0
|
445 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
446 return "NNLS Chroma";
|
matthiasm@0
|
447 }
|
matthiasm@0
|
448
|
matthiasm@0
|
449 string
|
matthiasm@0
|
450 NNLSChroma::getDescription() const
|
matthiasm@0
|
451 {
|
matthiasm@0
|
452 // Return something helpful here!
|
matthiasm@0
|
453 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@13
|
454 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
|
matthiasm@0
|
455 }
|
matthiasm@0
|
456
|
matthiasm@0
|
457 string
|
matthiasm@0
|
458 NNLSChroma::getMaker() const
|
matthiasm@0
|
459 {
|
matthiasm@0
|
460 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
461 // Your name here
|
matthiasm@0
|
462 return "Matthias Mauch";
|
matthiasm@0
|
463 }
|
matthiasm@0
|
464
|
matthiasm@0
|
465 int
|
matthiasm@0
|
466 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
467 {
|
matthiasm@0
|
468 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
469 // Increment this each time you release a version that behaves
|
matthiasm@0
|
470 // differently from the previous one
|
matthiasm@0
|
471 return 1;
|
matthiasm@0
|
472 }
|
matthiasm@0
|
473
|
matthiasm@0
|
474 string
|
matthiasm@0
|
475 NNLSChroma::getCopyright() const
|
matthiasm@0
|
476 {
|
matthiasm@0
|
477 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
478 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
479 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
480 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
481 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
482 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
483 }
|
matthiasm@0
|
484
|
matthiasm@0
|
485 NNLSChroma::InputDomain
|
matthiasm@0
|
486 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
487 {
|
matthiasm@0
|
488 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
489 return FrequencyDomain;
|
matthiasm@0
|
490 }
|
matthiasm@0
|
491
|
matthiasm@0
|
492 size_t
|
matthiasm@0
|
493 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
494 {
|
matthiasm@0
|
495 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
496 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
497 }
|
matthiasm@0
|
498
|
matthiasm@0
|
499 size_t
|
matthiasm@0
|
500 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
501 {
|
matthiasm@0
|
502 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
503 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
504 // means the same as the block size for TimeDomain
|
matthiasm@0
|
505 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
506 }
|
matthiasm@0
|
507
|
matthiasm@0
|
508 size_t
|
matthiasm@0
|
509 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
510 {
|
matthiasm@0
|
511 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
512 return 1;
|
matthiasm@0
|
513 }
|
matthiasm@0
|
514
|
matthiasm@0
|
515 size_t
|
matthiasm@0
|
516 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
517 {
|
matthiasm@0
|
518 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
519 return 1;
|
matthiasm@0
|
520 }
|
matthiasm@0
|
521
|
matthiasm@0
|
522 NNLSChroma::ParameterList
|
matthiasm@0
|
523 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
524 {
|
matthiasm@0
|
525 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
526 ParameterList list;
|
matthiasm@0
|
527
|
matthiasm@3
|
528 ParameterDescriptor d3;
|
matthiasm@3
|
529 d3.identifier = "preset";
|
matthiasm@3
|
530 d3.name = "preset";
|
matthiasm@3
|
531 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
532 d3.unit = "";
|
matthiasm@3
|
533 d3.isQuantized = true;
|
matthiasm@3
|
534 d3.quantizeStep = 1;
|
matthiasm@3
|
535 d3.minValue = 0.0;
|
matthiasm@4
|
536 d3.maxValue = 3.0;
|
matthiasm@3
|
537 d3.defaultValue = 0.0;
|
matthiasm@3
|
538 d3.valueNames.push_back("polyphonic pop");
|
matthiasm@3
|
539 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
540 d3.valueNames.push_back("solo keyboard");
|
matthiasm@3
|
541 d3.valueNames.push_back("manual");
|
matthiasm@3
|
542 list.push_back(d3);
|
matthiasm@4
|
543
|
matthiasm@17
|
544 ParameterDescriptor d5;
|
matthiasm@17
|
545 d5.identifier = "rollon";
|
matthiasm@17
|
546 d5.name = "spectral roll-on";
|
matthiasm@17
|
547 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
|
matthiasm@17
|
548 d5.unit = "";
|
matthiasm@17
|
549 d5.minValue = 0;
|
matthiasm@17
|
550 d5.maxValue = 1;
|
matthiasm@17
|
551 d5.defaultValue = 0;
|
matthiasm@17
|
552 d5.isQuantized = false;
|
matthiasm@17
|
553 list.push_back(d5);
|
matthiasm@17
|
554
|
matthiasm@4
|
555 // ParameterDescriptor d0;
|
matthiasm@4
|
556 // d0.identifier = "notedict";
|
matthiasm@4
|
557 // d0.name = "note dictionary";
|
matthiasm@4
|
558 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
559 // d0.unit = "";
|
matthiasm@4
|
560 // d0.minValue = 0;
|
matthiasm@4
|
561 // d0.maxValue = 1;
|
matthiasm@4
|
562 // d0.defaultValue = 0;
|
matthiasm@4
|
563 // d0.isQuantized = true;
|
matthiasm@4
|
564 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
565 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
566 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
567 // list.push_back(d0);
|
matthiasm@4
|
568
|
matthiasm@4
|
569 ParameterDescriptor d1;
|
matthiasm@4
|
570 d1.identifier = "tuningmode";
|
matthiasm@4
|
571 d1.name = "tuning mode";
|
matthiasm@4
|
572 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
573 d1.unit = "";
|
matthiasm@4
|
574 d1.minValue = 0;
|
matthiasm@4
|
575 d1.maxValue = 1;
|
matthiasm@4
|
576 d1.defaultValue = 0;
|
matthiasm@4
|
577 d1.isQuantized = true;
|
matthiasm@4
|
578 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
579 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
580 d1.quantizeStep = 1.0;
|
matthiasm@4
|
581 list.push_back(d1);
|
matthiasm@4
|
582
|
matthiasm@4
|
583 // ParameterDescriptor d2;
|
matthiasm@4
|
584 // d2.identifier = "paling";
|
matthiasm@4
|
585 // d2.name = "spectral paling";
|
matthiasm@4
|
586 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@4
|
587 // d2.unit = "";
|
matthiasm@4
|
588 // d2.isQuantized = true;
|
matthiasm@4
|
589 // // d2.quantizeStep = 0.1;
|
matthiasm@4
|
590 // d2.minValue = 0.0;
|
matthiasm@4
|
591 // d2.maxValue = 1.0;
|
matthiasm@4
|
592 // d2.defaultValue = 1.0;
|
matthiasm@4
|
593 // d2.isQuantized = false;
|
matthiasm@4
|
594 // list.push_back(d2);
|
matthiasm@12
|
595 ParameterDescriptor d4;
|
matthiasm@12
|
596 d4.identifier = "chromanormalize";
|
matthiasm@12
|
597 d4.name = "chroma normalization";
|
matthiasm@12
|
598 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
599 d4.unit = "";
|
matthiasm@12
|
600 d4.minValue = 0;
|
matthiasm@13
|
601 d4.maxValue = 3;
|
matthiasm@12
|
602 d4.defaultValue = 0;
|
matthiasm@12
|
603 d4.isQuantized = true;
|
matthiasm@13
|
604 d4.valueNames.push_back("none");
|
matthiasm@13
|
605 d4.valueNames.push_back("maximum norm");
|
matthiasm@13
|
606 d4.valueNames.push_back("L1 norm");
|
matthiasm@13
|
607 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
608 d4.quantizeStep = 1.0;
|
matthiasm@12
|
609 list.push_back(d4);
|
matthiasm@4
|
610
|
matthiasm@0
|
611 return list;
|
matthiasm@0
|
612 }
|
matthiasm@0
|
613
|
matthiasm@0
|
614 float
|
matthiasm@0
|
615 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
616 {
|
matthiasm@3
|
617 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
618 if (identifier == "notedict") {
|
matthiasm@0
|
619 return m_dictID;
|
matthiasm@0
|
620 }
|
matthiasm@0
|
621
|
matthiasm@0
|
622 if (identifier == "paling") {
|
matthiasm@0
|
623 return m_paling;
|
matthiasm@0
|
624 }
|
matthiasm@17
|
625
|
matthiasm@17
|
626 if (identifier == "rollon") {
|
matthiasm@17
|
627 return m_rollon;
|
matthiasm@17
|
628 }
|
matthiasm@0
|
629
|
matthiasm@0
|
630 if (identifier == "tuningmode") {
|
matthiasm@0
|
631 if (m_tuneLocal) {
|
matthiasm@0
|
632 return 1.0;
|
matthiasm@0
|
633 } else {
|
matthiasm@0
|
634 return 0.0;
|
matthiasm@0
|
635 }
|
matthiasm@0
|
636 }
|
matthiasm@3
|
637 if (identifier == "preset") {
|
matthiasm@3
|
638 return m_preset;
|
matthiasm@3
|
639 }
|
matthiasm@12
|
640 if (identifier == "chromanormalize") {
|
matthiasm@12
|
641 return m_doNormalizeChroma;
|
matthiasm@12
|
642 }
|
matthiasm@0
|
643 return 0;
|
matthiasm@0
|
644
|
matthiasm@0
|
645 }
|
matthiasm@0
|
646
|
matthiasm@0
|
647 void
|
matthiasm@0
|
648 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
649 {
|
matthiasm@3
|
650 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
651 if (identifier == "notedict") {
|
matthiasm@0
|
652 m_dictID = (int) value;
|
matthiasm@0
|
653 }
|
matthiasm@0
|
654
|
matthiasm@0
|
655 if (identifier == "paling") {
|
matthiasm@0
|
656 m_paling = value;
|
matthiasm@0
|
657 }
|
matthiasm@0
|
658
|
matthiasm@0
|
659 if (identifier == "tuningmode") {
|
matthiasm@0
|
660 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
661 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
662 }
|
matthiasm@3
|
663 if (identifier == "preset") {
|
matthiasm@3
|
664 m_preset = value;
|
matthiasm@3
|
665 if (m_preset == 0.0) {
|
matthiasm@3
|
666 m_tuneLocal = false;
|
matthiasm@3
|
667 m_paling = 1.0;
|
matthiasm@3
|
668 m_dictID = 0.0;
|
matthiasm@3
|
669 }
|
matthiasm@3
|
670 if (m_preset == 1.0) {
|
matthiasm@3
|
671 m_tuneLocal = false;
|
matthiasm@3
|
672 m_paling = 1.0;
|
matthiasm@3
|
673 m_dictID = 1.0;
|
matthiasm@3
|
674 }
|
matthiasm@3
|
675 if (m_preset == 2.0) {
|
matthiasm@3
|
676 m_tuneLocal = false;
|
matthiasm@3
|
677 m_paling = 0.7;
|
matthiasm@3
|
678 m_dictID = 0.0;
|
matthiasm@3
|
679 }
|
matthiasm@3
|
680 }
|
matthiasm@12
|
681 if (identifier == "chromanormalize") {
|
matthiasm@12
|
682 m_doNormalizeChroma = value;
|
matthiasm@12
|
683 }
|
matthiasm@17
|
684
|
matthiasm@17
|
685 if (identifier == "rollon") {
|
matthiasm@17
|
686 m_rollon = value;
|
matthiasm@17
|
687 }
|
matthiasm@0
|
688 }
|
matthiasm@0
|
689
|
matthiasm@0
|
690 NNLSChroma::ProgramList
|
matthiasm@0
|
691 NNLSChroma::getPrograms() const
|
matthiasm@0
|
692 {
|
matthiasm@0
|
693 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
694 ProgramList list;
|
matthiasm@0
|
695
|
matthiasm@0
|
696 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
697 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
698
|
matthiasm@0
|
699 return list;
|
matthiasm@0
|
700 }
|
matthiasm@0
|
701
|
matthiasm@0
|
702 string
|
matthiasm@0
|
703 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
704 {
|
matthiasm@0
|
705 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
706 return ""; // no programs
|
matthiasm@0
|
707 }
|
matthiasm@0
|
708
|
matthiasm@0
|
709 void
|
matthiasm@0
|
710 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
711 {
|
matthiasm@0
|
712 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
713 }
|
matthiasm@0
|
714
|
matthiasm@0
|
715
|
matthiasm@0
|
716 NNLSChroma::OutputList
|
matthiasm@0
|
717 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
718 {
|
matthiasm@0
|
719 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
720 OutputList list;
|
matthiasm@0
|
721
|
matthiasm@0
|
722 // Make chroma names for the binNames property
|
matthiasm@0
|
723 vector<string> chromanames;
|
matthiasm@0
|
724 vector<string> bothchromanames;
|
matthiasm@0
|
725 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
726 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
727 if (iNote < 12) {
|
matthiasm@0
|
728 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
729 }
|
matthiasm@0
|
730 }
|
matthiasm@0
|
731
|
matthiasm@1
|
732 // int nNote = 84;
|
matthiasm@0
|
733
|
matthiasm@0
|
734 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
735 // Every plugin must have at least one output.
|
matthiasm@0
|
736
|
matthiasm@0
|
737 OutputDescriptor d0;
|
matthiasm@0
|
738 d0.identifier = "tuning";
|
matthiasm@0
|
739 d0.name = "Tuning";
|
matthiasm@0
|
740 d0.description = "The concert pitch.";
|
matthiasm@0
|
741 d0.unit = "Hz";
|
matthiasm@0
|
742 d0.hasFixedBinCount = true;
|
matthiasm@0
|
743 d0.binCount = 0;
|
matthiasm@0
|
744 d0.hasKnownExtents = true;
|
matthiasm@0
|
745 d0.minValue = 427.47;
|
matthiasm@0
|
746 d0.maxValue = 452.89;
|
matthiasm@0
|
747 d0.isQuantized = false;
|
matthiasm@0
|
748 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
749 d0.hasDuration = false;
|
matthiasm@0
|
750 list.push_back(d0);
|
matthiasm@0
|
751
|
matthiasm@0
|
752 OutputDescriptor d1;
|
matthiasm@0
|
753 d1.identifier = "logfreqspec";
|
matthiasm@0
|
754 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
755 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
756 d1.unit = "";
|
matthiasm@0
|
757 d1.hasFixedBinCount = true;
|
matthiasm@0
|
758 d1.binCount = nNote;
|
matthiasm@0
|
759 d1.hasKnownExtents = false;
|
matthiasm@0
|
760 d1.isQuantized = false;
|
matthiasm@0
|
761 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
762 d1.hasDuration = false;
|
matthiasm@0
|
763 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
764 list.push_back(d1);
|
matthiasm@0
|
765
|
matthiasm@0
|
766 OutputDescriptor d2;
|
matthiasm@0
|
767 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
768 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
769 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
770 d2.unit = "";
|
matthiasm@0
|
771 d2.hasFixedBinCount = true;
|
matthiasm@0
|
772 d2.binCount = 256;
|
matthiasm@0
|
773 d2.hasKnownExtents = false;
|
matthiasm@0
|
774 d2.isQuantized = false;
|
matthiasm@0
|
775 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
776 d2.hasDuration = false;
|
matthiasm@0
|
777 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
778 list.push_back(d2);
|
matthiasm@0
|
779
|
matthiasm@0
|
780 OutputDescriptor d3;
|
matthiasm@0
|
781 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
782 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
783 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
784 d3.unit = "";
|
matthiasm@0
|
785 d3.hasFixedBinCount = true;
|
matthiasm@0
|
786 d3.binCount = 84;
|
matthiasm@0
|
787 d3.hasKnownExtents = false;
|
matthiasm@0
|
788 d3.isQuantized = false;
|
matthiasm@0
|
789 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
790 d3.hasDuration = false;
|
matthiasm@0
|
791 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
792 list.push_back(d3);
|
matthiasm@0
|
793
|
matthiasm@0
|
794 OutputDescriptor d4;
|
matthiasm@0
|
795 d4.identifier = "chroma";
|
matthiasm@0
|
796 d4.name = "Chromagram";
|
matthiasm@0
|
797 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
798 d4.unit = "";
|
matthiasm@0
|
799 d4.hasFixedBinCount = true;
|
matthiasm@0
|
800 d4.binCount = 12;
|
matthiasm@0
|
801 d4.binNames = chromanames;
|
matthiasm@0
|
802 d4.hasKnownExtents = false;
|
matthiasm@0
|
803 d4.isQuantized = false;
|
matthiasm@0
|
804 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
805 d4.hasDuration = false;
|
matthiasm@0
|
806 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
807 list.push_back(d4);
|
matthiasm@0
|
808
|
matthiasm@0
|
809 OutputDescriptor d5;
|
matthiasm@0
|
810 d5.identifier = "basschroma";
|
matthiasm@0
|
811 d5.name = "Bass Chromagram";
|
matthiasm@0
|
812 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
813 d5.unit = "";
|
matthiasm@0
|
814 d5.hasFixedBinCount = true;
|
matthiasm@0
|
815 d5.binCount = 12;
|
matthiasm@0
|
816 d5.binNames = chromanames;
|
matthiasm@0
|
817 d5.hasKnownExtents = false;
|
matthiasm@0
|
818 d5.isQuantized = false;
|
matthiasm@0
|
819 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
820 d5.hasDuration = false;
|
matthiasm@0
|
821 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
822 list.push_back(d5);
|
matthiasm@0
|
823
|
matthiasm@0
|
824 OutputDescriptor d6;
|
matthiasm@0
|
825 d6.identifier = "bothchroma";
|
matthiasm@0
|
826 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
827 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
828 d6.unit = "";
|
matthiasm@0
|
829 d6.hasFixedBinCount = true;
|
matthiasm@0
|
830 d6.binCount = 24;
|
matthiasm@0
|
831 d6.binNames = bothchromanames;
|
matthiasm@0
|
832 d6.hasKnownExtents = false;
|
matthiasm@0
|
833 d6.isQuantized = false;
|
matthiasm@0
|
834 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
835 d6.hasDuration = false;
|
matthiasm@0
|
836 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
837 list.push_back(d6);
|
matthiasm@0
|
838
|
matthiasm@0
|
839 OutputDescriptor d7;
|
matthiasm@0
|
840 d7.identifier = "simplechord";
|
matthiasm@0
|
841 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
842 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
843 d7.unit = "";
|
matthiasm@0
|
844 d7.hasFixedBinCount = true;
|
matthiasm@0
|
845 d7.binCount = 0;
|
matthiasm@0
|
846 d7.hasKnownExtents = false;
|
matthiasm@0
|
847 d7.isQuantized = false;
|
matthiasm@0
|
848 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
849 d7.hasDuration = false;
|
matthiasm@0
|
850 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
851 list.push_back(d7);
|
matthiasm@0
|
852
|
matthiasm@1
|
853 //
|
matthiasm@1
|
854 // OutputDescriptor d9;
|
matthiasm@1
|
855 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
856 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
857 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
858 // d9.unit = "";
|
matthiasm@1
|
859 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
860 // d9.binCount = 0;
|
matthiasm@1
|
861 // d9.hasKnownExtents = true;
|
matthiasm@1
|
862 // d9.minValue = 0.1;
|
matthiasm@1
|
863 // d9.maxValue = 0.9;
|
matthiasm@1
|
864 // d9.isQuantized = false;
|
matthiasm@1
|
865 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
866 // d9.hasDuration = false;
|
matthiasm@1
|
867 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
868 // list.push_back(d9);
|
matthiasm@1
|
869 //
|
matthiasm@1
|
870 OutputDescriptor d10;
|
matthiasm@17
|
871 d10.identifier = "localtuning";
|
matthiasm@17
|
872 d10.name = "Local tuning";
|
matthiasm@17
|
873 d10.description = "Tuning based on the history up to this timestamp.";
|
matthiasm@17
|
874 d10.unit = "Hz";
|
matthiasm@17
|
875 d10.hasFixedBinCount = true;
|
matthiasm@17
|
876 d10.binCount = 1;
|
matthiasm@17
|
877 d10.hasKnownExtents = true;
|
matthiasm@17
|
878 d10.minValue = 427.47;
|
matthiasm@17
|
879 d10.maxValue = 452.89;
|
matthiasm@17
|
880 d10.isQuantized = false;
|
matthiasm@17
|
881 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
882 d10.hasDuration = false;
|
matthiasm@17
|
883 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
884 list.push_back(d10);
|
matthiasm@17
|
885
|
matthiasm@17
|
886 OutputDescriptor d8;
|
matthiasm@17
|
887 d8.identifier = "harmonicchange";
|
matthiasm@17
|
888 d8.name = "Harmonic change value";
|
matthiasm@17
|
889 d8.description = "Harmonic change.";
|
matthiasm@17
|
890 d8.unit = "";
|
matthiasm@17
|
891 d8.hasFixedBinCount = true;
|
matthiasm@17
|
892 d8.binCount = 1;
|
matthiasm@17
|
893 d8.hasKnownExtents = true;
|
matthiasm@17
|
894 d8.minValue = 0.0;
|
matthiasm@17
|
895 d8.maxValue = 0.999;
|
matthiasm@17
|
896 d8.isQuantized = false;
|
matthiasm@17
|
897 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
898 d8.hasDuration = false;
|
matthiasm@17
|
899 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
900 list.push_back(d8);
|
matthiasm@1
|
901
|
matthiasm@0
|
902 return list;
|
matthiasm@0
|
903 }
|
matthiasm@0
|
904
|
matthiasm@0
|
905
|
matthiasm@0
|
906 bool
|
matthiasm@0
|
907 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
908 {
|
matthiasm@1
|
909 if (debug_on) {
|
matthiasm@1
|
910 cerr << "--> initialise";
|
matthiasm@1
|
911 }
|
matthiasm@1
|
912
|
matthiasm@0
|
913 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
914 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
915 m_blockSize = blockSize;
|
matthiasm@0
|
916 m_stepSize = stepSize;
|
matthiasm@0
|
917 frameCount = 0;
|
matthiasm@0
|
918 int tempn = 256 * m_blockSize/2;
|
matthiasm@4
|
919 // cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
920 float *tempkernel;
|
matthiasm@1
|
921
|
matthiasm@1
|
922 tempkernel = new float[tempn];
|
matthiasm@1
|
923
|
matthiasm@0
|
924 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
925 m_kernelValue.clear();
|
matthiasm@1
|
926 m_kernelFftIndex.clear();
|
matthiasm@1
|
927 m_kernelNoteIndex.clear();
|
matthiasm@1
|
928 int countNonzero = 0;
|
matthiasm@0
|
929 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
930 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
931 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
932 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
933 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
934 countNonzero++;
|
matthiasm@0
|
935 }
|
matthiasm@1
|
936 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
937 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
938 }
|
matthiasm@0
|
939 }
|
matthiasm@1
|
940 }
|
matthiasm@4
|
941 // cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
942 delete [] tempkernel;
|
matthiasm@3
|
943 ofstream myfile;
|
matthiasm@3
|
944 myfile.open ("matrix.txt");
|
matthiasm@3
|
945 // myfile << "Writing this to a file.\n";
|
matthiasm@3
|
946 for (int i = 0; i < nNote * 84; ++i) {
|
matthiasm@3
|
947 myfile << m_dict[i] << endl;
|
matthiasm@3
|
948 }
|
matthiasm@3
|
949 myfile.close();
|
matthiasm@0
|
950 return true;
|
matthiasm@0
|
951 }
|
matthiasm@0
|
952
|
matthiasm@0
|
953 void
|
matthiasm@0
|
954 NNLSChroma::reset()
|
matthiasm@0
|
955 {
|
matthiasm@4
|
956 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
957
|
matthiasm@0
|
958 // Clear buffers, reset stored values, etc
|
matthiasm@4
|
959 frameCount = 0;
|
matthiasm@4
|
960 m_dictID = 0;
|
matthiasm@4
|
961 m_fl.clear();
|
matthiasm@4
|
962 m_meanTuning0 = 0;
|
matthiasm@4
|
963 m_meanTuning1 = 0;
|
matthiasm@4
|
964 m_meanTuning2 = 0;
|
matthiasm@4
|
965 m_localTuning0 = 0;
|
matthiasm@4
|
966 m_localTuning1 = 0;
|
matthiasm@4
|
967 m_localTuning2 = 0;
|
matthiasm@4
|
968 m_localTuning.clear();
|
matthiasm@0
|
969 }
|
matthiasm@0
|
970
|
matthiasm@0
|
971 NNLSChroma::FeatureSet
|
matthiasm@0
|
972 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
973 {
|
matthiasm@4
|
974 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
975 frameCount++;
|
matthiasm@0
|
976 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
977
|
matthiasm@0
|
978 Feature f10; // local tuning
|
matthiasm@3
|
979 f10.hasTimestamp = true;
|
matthiasm@4
|
980 f10.timestamp = timestamp;
|
matthiasm@0
|
981 const float *fbuf = inputBuffers[0];
|
matthiasm@17
|
982 float energysum = 0;
|
matthiasm@0
|
983 // make magnitude
|
matthiasm@14
|
984 float maxmag = -10000;
|
matthiasm@0
|
985 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
986 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@14
|
987 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@14
|
988 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
matthiasm@17
|
989 if (m_rollon > 0) {
|
matthiasm@17
|
990 energysum += pow(magnitude[iBin],2);
|
matthiasm@17
|
991 }
|
matthiasm@14
|
992 }
|
matthiasm@14
|
993
|
matthiasm@17
|
994 float cumenergy = 0;
|
matthiasm@17
|
995 if (m_rollon > 0) {
|
matthiasm@17
|
996 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
matthiasm@17
|
997 cumenergy += pow(magnitude[iBin],2);
|
matthiasm@17
|
998 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
|
matthiasm@17
|
999 else break;
|
matthiasm@17
|
1000 }
|
matthiasm@17
|
1001 }
|
matthiasm@17
|
1002
|
matthiasm@17
|
1003 if (maxmag < 2) {
|
matthiasm@14
|
1004 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
matthiasm@14
|
1005 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@14
|
1006 magnitude[iBin] = 0;
|
matthiasm@14
|
1007 }
|
matthiasm@0
|
1008 }
|
matthiasm@4
|
1009
|
matthiasm@0
|
1010 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
1011 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
1012 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
1013 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
1014 }
|
matthiasm@0
|
1015 int binCount = 0;
|
matthiasm@0
|
1016 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
1017 // cerr << ".";
|
matthiasm@1
|
1018 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
1019 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
1020 binCount++;
|
matthiasm@0
|
1021 }
|
matthiasm@1
|
1022 // cerr << nm[20];
|
matthiasm@1
|
1023 // cerr << endl;
|
matthiasm@0
|
1024
|
matthiasm@0
|
1025
|
matthiasm@0
|
1026 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
1027 // update means of complex tuning variables
|
matthiasm@0
|
1028 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
1029 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
1030 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
1031
|
matthiasm@0
|
1032 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
1033 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
1034 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
1035 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@3
|
1036 float ratioOld = 0.997;
|
matthiasm@3
|
1037 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
1038 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
1039 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
1040 }
|
matthiasm@0
|
1041
|
matthiasm@0
|
1042 // if (m_tuneLocal) {
|
matthiasm@0
|
1043 // local tuning
|
matthiasm@0
|
1044 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
1045 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
1046 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
1047 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
1048 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
1049 f10.values.push_back(tuning440);
|
matthiasm@3
|
1050 // cerr << tuning440 << endl;
|
matthiasm@0
|
1051 // }
|
matthiasm@0
|
1052
|
matthiasm@0
|
1053 Feature f1; // logfreqspec
|
matthiasm@0
|
1054 f1.hasTimestamp = true;
|
matthiasm@0
|
1055 f1.timestamp = timestamp;
|
matthiasm@0
|
1056 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
1057 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
1058 }
|
matthiasm@0
|
1059
|
matthiasm@0
|
1060 FeatureSet fs;
|
matthiasm@0
|
1061 fs[1].push_back(f1);
|
matthiasm@3
|
1062 fs[8].push_back(f10);
|
matthiasm@0
|
1063
|
matthiasm@0
|
1064 // deletes
|
matthiasm@0
|
1065 delete[] magnitude;
|
matthiasm@0
|
1066 delete[] nm;
|
matthiasm@0
|
1067
|
matthiasm@0
|
1068 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@7
|
1069 char * pPath;
|
matthiasm@7
|
1070 pPath = getenv ("VAMP_PATH");
|
matthiasm@7
|
1071
|
matthiasm@7
|
1072
|
matthiasm@0
|
1073 return fs;
|
matthiasm@0
|
1074 }
|
matthiasm@0
|
1075
|
matthiasm@0
|
1076 NNLSChroma::FeatureSet
|
matthiasm@0
|
1077 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
1078 {
|
matthiasm@4
|
1079 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@4
|
1080 FeatureSet fsOut;
|
matthiasm@4
|
1081 if (m_fl.size() == 0) return fsOut;
|
matthiasm@9
|
1082 int nChord = m_chordnames.size();
|
matthiasm@0
|
1083 //
|
matthiasm@1
|
1084 /** Calculate Tuning
|
matthiasm@1
|
1085 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
1086 cumulative mean real and imag values)
|
matthiasm@1
|
1087 **/
|
matthiasm@1
|
1088 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
1089 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
1090 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
1091 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
1092 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
1093 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1094
|
matthiasm@1
|
1095 char buffer0 [50];
|
matthiasm@1
|
1096
|
matthiasm@1
|
1097 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
1098
|
matthiasm@1
|
1099 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
1100
|
matthiasm@1
|
1101 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
1102 Feature f0; // tuning
|
matthiasm@1
|
1103 f0.hasTimestamp = true;
|
matthiasm@1
|
1104 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
1105 f0.label = buffer0;
|
matthiasm@1
|
1106 fsOut[0].push_back(f0);
|
matthiasm@1
|
1107
|
matthiasm@1
|
1108 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
1109 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
1110 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@13
|
1111 **/
|
matthiasm@17
|
1112 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
1113
|
matthiasm@1
|
1114 float tempValue = 0;
|
matthiasm@1
|
1115 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
1116 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
1117 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
1118 int count = 0;
|
matthiasm@1
|
1119
|
matthiasm@1
|
1120 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
1121 Feature f1 = *i;
|
matthiasm@1
|
1122 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
1123 f2.hasTimestamp = true;
|
matthiasm@1
|
1124 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
1125 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
1126
|
matthiasm@1
|
1127 if (m_tuneLocal) {
|
matthiasm@1
|
1128 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
1129 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1130 }
|
matthiasm@1
|
1131
|
matthiasm@1
|
1132 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
1133
|
matthiasm@4
|
1134 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
1135 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
1136 f2.values.push_back(tempValue);
|
matthiasm@1
|
1137 }
|
matthiasm@1
|
1138
|
matthiasm@1
|
1139 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
1140 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
1141 vector<float> runningstd;
|
matthiasm@1
|
1142 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
1143 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
1144 }
|
matthiasm@1
|
1145 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
1146 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
1147 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
1148 if (runningstd[i] > 0) {
|
matthiasm@1
|
1149 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
1150 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1151 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
1152 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1153 }
|
matthiasm@1
|
1154 if (f2.values[i] < 0) {
|
matthiasm@1
|
1155 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
1156 }
|
matthiasm@1
|
1157 }
|
matthiasm@1
|
1158 fsOut[2].push_back(f2);
|
matthiasm@1
|
1159 count++;
|
matthiasm@1
|
1160 }
|
matthiasm@13
|
1161 cerr << "done." << endl;
|
matthiasm@1
|
1162
|
matthiasm@1
|
1163 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
1164 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
1165 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
1166 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
1167 bass and treble stacked onto each other).
|
matthiasm@1
|
1168 **/
|
matthiasm@13
|
1169 if (m_dictID == 1) {
|
matthiasm@13
|
1170 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
matthiasm@13
|
1171 } else {
|
matthiasm@13
|
1172 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
matthiasm@13
|
1173 }
|
matthiasm@13
|
1174
|
matthiasm@1
|
1175
|
matthiasm@1
|
1176 vector<vector<float> > chordogram;
|
matthiasm@3
|
1177 vector<vector<int> > scoreChordogram;
|
matthiasm@17
|
1178 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
matthiasm@1
|
1179 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
1180 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
1181 count = 0;
|
matthiasm@9
|
1182
|
matthiasm@1
|
1183 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
1184 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
1185 Feature f3; // semitone spectrum
|
matthiasm@1
|
1186 Feature f4; // treble chromagram
|
matthiasm@1
|
1187 Feature f5; // bass chromagram
|
matthiasm@1
|
1188 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
1189
|
matthiasm@1
|
1190 f3.hasTimestamp = true;
|
matthiasm@1
|
1191 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
1192
|
matthiasm@1
|
1193 f4.hasTimestamp = true;
|
matthiasm@1
|
1194 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
1195
|
matthiasm@1
|
1196 f5.hasTimestamp = true;
|
matthiasm@1
|
1197 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
1198
|
matthiasm@1
|
1199 f6.hasTimestamp = true;
|
matthiasm@1
|
1200 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
1201
|
matthiasm@3
|
1202 float b[256];
|
matthiasm@1
|
1203
|
matthiasm@1
|
1204 bool some_b_greater_zero = false;
|
matthiasm@3
|
1205 float sumb = 0;
|
matthiasm@1
|
1206 for (int i = 0; i < 256; i++) {
|
matthiasm@3
|
1207 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@3
|
1208 b[i] = f2.values[i];
|
matthiasm@3
|
1209 sumb += b[i];
|
matthiasm@1
|
1210 if (b[i] > 0) {
|
matthiasm@1
|
1211 some_b_greater_zero = true;
|
matthiasm@1
|
1212 }
|
matthiasm@1
|
1213 }
|
matthiasm@1
|
1214
|
matthiasm@1
|
1215 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
1216
|
matthiasm@1
|
1217 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
1218 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
1219 float currval;
|
matthiasm@1
|
1220 unsigned iSemitone = 0;
|
matthiasm@1
|
1221
|
matthiasm@1
|
1222 if (some_b_greater_zero) {
|
matthiasm@3
|
1223 if (m_dictID == 1) {
|
matthiasm@1
|
1224 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
1225 currval = 0;
|
matthiasm@3
|
1226 currval += b[iNote + 1 + -1] * 0.5;
|
matthiasm@3
|
1227 currval += b[iNote + 1 + 0] * 1.0;
|
matthiasm@3
|
1228 currval += b[iNote + 1 + 1] * 0.5;
|
matthiasm@1
|
1229 f3.values.push_back(currval);
|
matthiasm@1
|
1230 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
1231 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
1232 iSemitone++;
|
matthiasm@1
|
1233 }
|
matthiasm@1
|
1234
|
matthiasm@1
|
1235 } else {
|
matthiasm@3
|
1236 float x[84+1000];
|
matthiasm@3
|
1237 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
matthiasm@10
|
1238 vector<int> signifIndex;
|
matthiasm@10
|
1239 int index=0;
|
matthiasm@10
|
1240 sumb /= 84.0;
|
matthiasm@10
|
1241 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@10
|
1242 float currval = 0;
|
matthiasm@10
|
1243 currval += b[iNote + 1 + -1];
|
matthiasm@10
|
1244 currval += b[iNote + 1 + 0];
|
matthiasm@10
|
1245 currval += b[iNote + 1 + 1];
|
matthiasm@10
|
1246 if (currval > 0) signifIndex.push_back(index);
|
matthiasm@10
|
1247 f3.values.push_back(0); // fill the values, change later
|
matthiasm@10
|
1248 index++;
|
matthiasm@10
|
1249 }
|
matthiasm@3
|
1250 float rnorm;
|
matthiasm@3
|
1251 float w[84+1000];
|
matthiasm@3
|
1252 float zz[84+1000];
|
matthiasm@3
|
1253 int indx[84+1000];
|
matthiasm@1
|
1254 int mode;
|
matthiasm@10
|
1255 int dictsize = 256*signifIndex.size();
|
matthiasm@10
|
1256 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
matthiasm@10
|
1257 float *curr_dict = new float[dictsize];
|
matthiasm@10
|
1258 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
matthiasm@10
|
1259 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
matthiasm@10
|
1260 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
matthiasm@10
|
1261 }
|
matthiasm@3
|
1262 }
|
matthiasm@10
|
1263 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@10
|
1264 delete [] curr_dict;
|
matthiasm@10
|
1265 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
matthiasm@10
|
1266 f3.values[signifIndex[iNote]] = x[iNote];
|
matthiasm@3
|
1267 // cerr << mode << endl;
|
matthiasm@10
|
1268 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
matthiasm@10
|
1269 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
matthiasm@3
|
1270 }
|
matthiasm@1
|
1271 }
|
matthiasm@1
|
1272 }
|
matthiasm@13
|
1273
|
matthiasm@10
|
1274
|
matthiasm@12
|
1275
|
matthiasm@13
|
1276
|
matthiasm@12
|
1277 f4.values = chroma;
|
matthiasm@1
|
1278 f5.values = basschroma;
|
matthiasm@1
|
1279 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
1280 f6.values = chroma;
|
matthiasm@1
|
1281
|
matthiasm@13
|
1282 if (m_doNormalizeChroma > 0) {
|
matthiasm@13
|
1283 vector<float> chromanorm = vector<float>(3,0);
|
matthiasm@13
|
1284 switch (int(m_doNormalizeChroma)) {
|
matthiasm@13
|
1285 case 0: // should never end up here
|
matthiasm@13
|
1286 break;
|
matthiasm@13
|
1287 case 1:
|
matthiasm@13
|
1288 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
matthiasm@13
|
1289 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
matthiasm@13
|
1290 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
matthiasm@13
|
1291 break;
|
matthiasm@13
|
1292 case 2:
|
matthiasm@13
|
1293 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
matthiasm@13
|
1294 chromanorm[0] += *it;
|
matthiasm@13
|
1295 }
|
matthiasm@13
|
1296 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
matthiasm@13
|
1297 chromanorm[1] += *it;
|
matthiasm@13
|
1298 }
|
matthiasm@13
|
1299 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
matthiasm@13
|
1300 chromanorm[2] += *it;
|
matthiasm@13
|
1301 }
|
matthiasm@13
|
1302 break;
|
matthiasm@13
|
1303 case 3:
|
matthiasm@13
|
1304 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
matthiasm@13
|
1305 chromanorm[0] += pow(*it,2);
|
matthiasm@13
|
1306 }
|
matthiasm@13
|
1307 chromanorm[0] = sqrt(chromanorm[0]);
|
matthiasm@13
|
1308 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
matthiasm@13
|
1309 chromanorm[1] += pow(*it,2);
|
matthiasm@13
|
1310 }
|
matthiasm@13
|
1311 chromanorm[1] = sqrt(chromanorm[1]);
|
matthiasm@13
|
1312 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
matthiasm@13
|
1313 chromanorm[2] += pow(*it,2);
|
matthiasm@13
|
1314 }
|
matthiasm@13
|
1315 chromanorm[2] = sqrt(chromanorm[2]);
|
matthiasm@13
|
1316 break;
|
matthiasm@13
|
1317 }
|
matthiasm@13
|
1318 if (chromanorm[0] > 0) {
|
matthiasm@13
|
1319 for (int i = 0; i < f4.values.size(); i++) {
|
matthiasm@13
|
1320 f4.values[i] /= chromanorm[0];
|
matthiasm@13
|
1321 }
|
matthiasm@13
|
1322 }
|
matthiasm@13
|
1323 if (chromanorm[1] > 0) {
|
matthiasm@13
|
1324 for (int i = 0; i < f5.values.size(); i++) {
|
matthiasm@13
|
1325 f5.values[i] /= chromanorm[1];
|
matthiasm@13
|
1326 }
|
matthiasm@13
|
1327 }
|
matthiasm@13
|
1328 if (chromanorm[2] > 0) {
|
matthiasm@13
|
1329 for (int i = 0; i < f6.values.size(); i++) {
|
matthiasm@13
|
1330 f6.values[i] /= chromanorm[2];
|
matthiasm@13
|
1331 }
|
matthiasm@13
|
1332 }
|
matthiasm@13
|
1333
|
matthiasm@13
|
1334 }
|
matthiasm@13
|
1335
|
matthiasm@1
|
1336 // local chord estimation
|
matthiasm@1
|
1337 vector<float> currentChordSalience;
|
matthiasm@1
|
1338 float tempchordvalue = 0;
|
matthiasm@1
|
1339 float sumchordvalue = 0;
|
matthiasm@9
|
1340
|
matthiasm@1
|
1341 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1342 tempchordvalue = 0;
|
matthiasm@1
|
1343 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@9
|
1344 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1345 }
|
matthiasm@1
|
1346 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@9
|
1347 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1348 }
|
matthiasm@1
|
1349 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
1350 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
1351 }
|
matthiasm@17
|
1352 if (sumchordvalue > 0) {
|
matthiasm@17
|
1353 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@17
|
1354 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@17
|
1355 }
|
matthiasm@17
|
1356 } else {
|
matthiasm@17
|
1357 currentChordSalience[nChord-1] = 1.0;
|
matthiasm@17
|
1358 }
|
matthiasm@1
|
1359 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1360
|
matthiasm@1
|
1361 fsOut[3].push_back(f3);
|
matthiasm@1
|
1362 fsOut[4].push_back(f4);
|
matthiasm@1
|
1363 fsOut[5].push_back(f5);
|
matthiasm@1
|
1364 fsOut[6].push_back(f6);
|
matthiasm@1
|
1365 count++;
|
matthiasm@1
|
1366 }
|
matthiasm@13
|
1367 cerr << "done." << endl;
|
matthiasm@13
|
1368
|
matthiasm@10
|
1369
|
matthiasm@3
|
1370 /* Simple chord estimation
|
matthiasm@3
|
1371 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@3
|
1372 take the maximum. Very simple, don't do this at home...
|
matthiasm@3
|
1373 */
|
matthiasm@13
|
1374 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
matthiasm@3
|
1375 count = 0;
|
matthiasm@3
|
1376 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@3
|
1377 vector<int> chordSequence;
|
matthiasm@3
|
1378 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
matthiasm@3
|
1379 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@3
|
1380 scoreChordogram.push_back(temp);
|
matthiasm@3
|
1381 }
|
matthiasm@4
|
1382 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
matthiasm@3
|
1383 int startIndex = count + 1;
|
matthiasm@3
|
1384 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
1385
|
matthiasm@10
|
1386 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
1387
|
matthiasm@10
|
1388 vector<int> chordCandidates;
|
matthiasm@10
|
1389 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
matthiasm@10
|
1390 // float currsum = 0;
|
matthiasm@10
|
1391 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@10
|
1392 // currsum += chordogram[iFrame][iChord];
|
matthiasm@10
|
1393 // }
|
matthiasm@10
|
1394 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
matthiasm@10
|
1395 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@10
|
1396 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@10
|
1397 chordCandidates.push_back(iChord);
|
matthiasm@10
|
1398 break;
|
matthiasm@10
|
1399 }
|
matthiasm@10
|
1400 }
|
matthiasm@10
|
1401 }
|
matthiasm@10
|
1402 chordCandidates.push_back(nChord-1);
|
matthiasm@10
|
1403 // cerr << chordCandidates.size() << endl;
|
matthiasm@10
|
1404
|
matthiasm@10
|
1405 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@4
|
1406 float maxindex = 0; //... and the index thereof
|
matthiasm@10
|
1407 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
matthiasm@10
|
1408 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@10
|
1409
|
matthiasm@4
|
1410 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@3
|
1411 // now find the max values on both sides of iWF
|
matthiasm@3
|
1412 // left side:
|
matthiasm@3
|
1413 float maxL = 0;
|
matthiasm@3
|
1414 unsigned maxindL = nChord-1;
|
matthiasm@10
|
1415 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@10
|
1416 unsigned iChord = chordCandidates[kChord];
|
matthiasm@3
|
1417 float currsum = 0;
|
matthiasm@3
|
1418 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@3
|
1419 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1420 }
|
matthiasm@3
|
1421 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1422 if (currsum > maxL) {
|
matthiasm@3
|
1423 maxL = currsum;
|
matthiasm@3
|
1424 maxindL = iChord;
|
matthiasm@3
|
1425 }
|
matthiasm@3
|
1426 }
|
matthiasm@3
|
1427 // right side:
|
matthiasm@3
|
1428 float maxR = 0;
|
matthiasm@3
|
1429 unsigned maxindR = nChord-1;
|
matthiasm@10
|
1430 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@10
|
1431 unsigned iChord = chordCandidates[kChord];
|
matthiasm@3
|
1432 float currsum = 0;
|
matthiasm@3
|
1433 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1434 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1435 }
|
matthiasm@3
|
1436 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1437 if (currsum > maxR) {
|
matthiasm@3
|
1438 maxR = currsum;
|
matthiasm@3
|
1439 maxindR = iChord;
|
matthiasm@3
|
1440 }
|
matthiasm@3
|
1441 }
|
matthiasm@3
|
1442 if (maxL+maxR > maxval) {
|
matthiasm@3
|
1443 maxval = maxL+maxR;
|
matthiasm@3
|
1444 maxindex = iWF;
|
matthiasm@3
|
1445 bestchordL = maxindL;
|
matthiasm@3
|
1446 bestchordR = maxindR;
|
matthiasm@3
|
1447 }
|
matthiasm@3
|
1448
|
matthiasm@3
|
1449 }
|
matthiasm@3
|
1450 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@3
|
1451 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@3
|
1452 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@3
|
1453 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@3
|
1454 }
|
matthiasm@3
|
1455 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1456 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@3
|
1457 }
|
matthiasm@17
|
1458 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@3
|
1459 count++;
|
matthiasm@3
|
1460 }
|
matthiasm@13
|
1461 // cerr << "******* agent finished *******" << endl;
|
matthiasm@3
|
1462 count = 0;
|
matthiasm@3
|
1463 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1464 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1465 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1466 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1467 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@3
|
1468 maxval = scoreChordogram[count][iChord];
|
matthiasm@3
|
1469 maxindex = iChord;
|
matthiasm@4
|
1470 // cerr << iChord << endl;
|
matthiasm@3
|
1471 }
|
matthiasm@3
|
1472 }
|
matthiasm@3
|
1473 chordSequence.push_back(maxindex);
|
matthiasm@4
|
1474 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
matthiasm@3
|
1475 count++;
|
matthiasm@3
|
1476 }
|
matthiasm@13
|
1477 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
1478
|
matthiasm@3
|
1479
|
matthiasm@3
|
1480 // mode filter on chordSequence
|
matthiasm@3
|
1481 count = 0;
|
matthiasm@12
|
1482 string oldChord = "";
|
matthiasm@3
|
1483 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1484 Feature f6 = *it;
|
matthiasm@3
|
1485 Feature f7; // chord estimate
|
matthiasm@3
|
1486 f7.hasTimestamp = true;
|
matthiasm@3
|
1487 f7.timestamp = f6.timestamp;
|
matthiasm@17
|
1488 Feature f8; // chord estimate
|
matthiasm@17
|
1489 f8.hasTimestamp = true;
|
matthiasm@17
|
1490 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
1491
|
matthiasm@3
|
1492 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@3
|
1493 int maxChordCount = 0;
|
matthiasm@3
|
1494 int maxChordIndex = nChord-1;
|
matthiasm@12
|
1495 string maxChord;
|
matthiasm@4
|
1496 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@4
|
1497 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@4
|
1498 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@4
|
1499 chordCount[chordSequence[i]]++;
|
matthiasm@4
|
1500 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@7
|
1501 // cerr << "start index " << startIndex << endl;
|
matthiasm@4
|
1502 maxChordCount++;
|
matthiasm@4
|
1503 maxChordIndex = chordSequence[i];
|
matthiasm@12
|
1504 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@4
|
1505 }
|
matthiasm@4
|
1506 }
|
matthiasm@4
|
1507 // chordSequence[count] = maxChordIndex;
|
matthiasm@7
|
1508 // cerr << maxChordIndex << endl;
|
matthiasm@17
|
1509 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
matthiasm@17
|
1510 // cerr << chordchange[count] << endl;
|
matthiasm@17
|
1511 fsOut[9].push_back(f8);
|
matthiasm@12
|
1512 if (oldChord != maxChord) {
|
matthiasm@12
|
1513 oldChord = maxChord;
|
matthiasm@3
|
1514
|
matthiasm@9
|
1515 // char buffer1 [50];
|
matthiasm@9
|
1516 // if (maxChordIndex < nChord - 1) {
|
matthiasm@9
|
1517 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@9
|
1518 // } else {
|
matthiasm@9
|
1519 // sprintf(buffer1, "N");
|
matthiasm@9
|
1520 // }
|
matthiasm@9
|
1521 // f7.label = buffer1;
|
matthiasm@9
|
1522 f7.label = m_chordnames[maxChordIndex];
|
matthiasm@3
|
1523 fsOut[7].push_back(f7);
|
matthiasm@3
|
1524 }
|
matthiasm@3
|
1525 count++;
|
matthiasm@3
|
1526 }
|
matthiasm@17
|
1527 Feature f7; // last chord estimate
|
matthiasm@17
|
1528 f7.hasTimestamp = true;
|
matthiasm@17
|
1529 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
matthiasm@17
|
1530 f7.label = "N";
|
matthiasm@17
|
1531 fsOut[7].push_back(f7);
|
matthiasm@13
|
1532 cerr << "done." << endl;
|
matthiasm@0
|
1533 // // musicity
|
matthiasm@0
|
1534 // count = 0;
|
matthiasm@0
|
1535 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1536 // vector<float> musicityValue;
|
matthiasm@0
|
1537 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1538 // Feature f4 = *it;
|
matthiasm@0
|
1539 //
|
matthiasm@0
|
1540 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1541 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1542 // float chromasum = 0;
|
matthiasm@0
|
1543 // float diffsum = 0;
|
matthiasm@0
|
1544 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1545 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1546 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1547 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1548 // }
|
matthiasm@0
|
1549 // }
|
matthiasm@0
|
1550 // diffsum /= chromasum;
|
matthiasm@0
|
1551 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1552 // count++;
|
matthiasm@0
|
1553 // }
|
matthiasm@0
|
1554 //
|
matthiasm@0
|
1555 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1556 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1557 // musicityThreshold = 0.74;
|
matthiasm@0
|
1558 // }
|
matthiasm@0
|
1559 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1560 // musicityThreshold = 0.77;
|
matthiasm@0
|
1561 // }
|
matthiasm@0
|
1562 //
|
matthiasm@0
|
1563 // count = 0;
|
matthiasm@0
|
1564 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1565 // Feature f4 = *it;
|
matthiasm@0
|
1566 // Feature f8; // musicity
|
matthiasm@0
|
1567 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1568 //
|
matthiasm@0
|
1569 // f8.hasTimestamp = true;
|
matthiasm@0
|
1570 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1571 // f9.hasTimestamp = true;
|
matthiasm@0
|
1572 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1573 //
|
matthiasm@0
|
1574 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1575 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1576 // int musicityCount = 0;
|
matthiasm@0
|
1577 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1578 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1579 // }
|
matthiasm@0
|
1580 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1581 //
|
matthiasm@0
|
1582 // if (isSpeech) {
|
matthiasm@0
|
1583 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1584 // f9.label = "Speech";
|
matthiasm@0
|
1585 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1586 // oldlabeltype = 2;
|
matthiasm@0
|
1587 // }
|
matthiasm@0
|
1588 // } else {
|
matthiasm@0
|
1589 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1590 // f9.label = "Music";
|
matthiasm@0
|
1591 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1592 // oldlabeltype = 1;
|
matthiasm@0
|
1593 // }
|
matthiasm@0
|
1594 // }
|
matthiasm@0
|
1595 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1596 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1597 // count++;
|
matthiasm@0
|
1598 // }
|
matthiasm@0
|
1599 return fsOut;
|
matthiasm@0
|
1600
|
matthiasm@0
|
1601 }
|
matthiasm@0
|
1602
|