matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@10
|
4 // #include <omp.h>
|
matthiasm@0
|
5 #include <list>
|
matthiasm@0
|
6 #include <iostream>
|
matthiasm@3
|
7 #include <fstream>
|
matthiasm@0
|
8 #include <sstream>
|
matthiasm@0
|
9 #include <cassert>
|
matthiasm@7
|
10 #include <cstdlib>
|
matthiasm@0
|
11 #include <cstdio>
|
matthiasm@7
|
12 #include <boost/tokenizer.hpp>
|
matthiasm@7
|
13 #include <boost/iostreams/device/file.hpp>
|
matthiasm@7
|
14 #include <boost/iostreams/stream.hpp>
|
matthiasm@7
|
15 #include <boost/lexical_cast.hpp>
|
matthiasm@1
|
16 #include "nnls.h"
|
matthiasm@0
|
17 #include "chorddict.cpp"
|
matthiasm@9
|
18
|
matthiasm@10
|
19 // #include <omp.h>
|
matthiasm@10
|
20 // #define N 1000
|
matthiasm@10
|
21 // #define CHUNKSIZE 100
|
matthiasm@9
|
22
|
matthiasm@9
|
23
|
matthiasm@0
|
24 using namespace std;
|
matthiasm@7
|
25 using namespace boost;
|
matthiasm@0
|
26
|
matthiasm@0
|
27 const float sinvalue = 0.866025404;
|
matthiasm@0
|
28 const float cosvalue = -0.5;
|
matthiasm@0
|
29 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
30 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
31 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
32 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
33 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@7
|
34
|
matthiasm@7
|
35 const char* bassnames[12][12] ={
|
matthiasm@7
|
36 {"A","","B","C","C#","D","","E","","F#","G","G#"},
|
matthiasm@7
|
37 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
|
matthiasm@7
|
38 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
|
matthiasm@7
|
39 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
|
matthiasm@7
|
40 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
|
matthiasm@7
|
41 {"D","","E","F","F#","G","","A","","B","C","C#"},
|
matthiasm@7
|
42 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
|
matthiasm@7
|
43 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
|
matthiasm@7
|
44 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
|
matthiasm@7
|
45 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
|
matthiasm@7
|
46 {"G","","A","Bb","B","C","","D","","E","F","F#"},
|
matthiasm@7
|
47 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
|
matthiasm@7
|
48 };
|
matthiasm@0
|
49 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
50 const int nNote = 256;
|
matthiasm@0
|
51
|
matthiasm@0
|
52 /** Special Convolution
|
matthiasm@0
|
53 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
54 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
55 as the first (last) valid convolution bin.
|
matthiasm@0
|
56 **/
|
matthiasm@0
|
57
|
matthiasm@0
|
58 const bool debug_on = false;
|
matthiasm@0
|
59
|
matthiasm@0
|
60 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
61 {
|
matthiasm@0
|
62 float s;
|
matthiasm@0
|
63 int m, n;
|
matthiasm@0
|
64 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
65 int lenKernel = kernel.size();
|
matthiasm@0
|
66
|
matthiasm@0
|
67 vector<float> Z(256,0);
|
matthiasm@0
|
68 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
69
|
matthiasm@0
|
70 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
71 s=0.0;
|
matthiasm@0
|
72 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
73 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
74 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
75 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
78 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
79 }
|
matthiasm@0
|
80
|
matthiasm@0
|
81 // fill upper and lower pads
|
matthiasm@0
|
82 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
83 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
84 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
85 return Z;
|
matthiasm@0
|
86 }
|
matthiasm@0
|
87
|
matthiasm@0
|
88 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
89 // {
|
matthiasm@0
|
90 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
91 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
92 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
93 // }
|
matthiasm@0
|
94 // return freq;
|
matthiasm@0
|
95 // }
|
matthiasm@0
|
96
|
matthiasm@0
|
97 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
98 {
|
matthiasm@0
|
99 float recipwidth = 1.0/width;
|
matthiasm@0
|
100 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
101 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
102 }
|
matthiasm@0
|
103 return 0.0;
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
matthiasm@0
|
106 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
107 {
|
matthiasm@0
|
108 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
109 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
110 // now scale to correct for note density
|
matthiasm@0
|
111 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
112 if (x > 0) {
|
matthiasm@0
|
113 out = out / (c * x);
|
matthiasm@0
|
114 } else {
|
matthiasm@0
|
115 out = 0;
|
matthiasm@0
|
116 }
|
matthiasm@0
|
117 return out;
|
matthiasm@0
|
118 }
|
matthiasm@0
|
119
|
matthiasm@0
|
120 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
121
|
matthiasm@0
|
122 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
123 int minoctave = 0; // this must be 0
|
matthiasm@0
|
124 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
125 int oversampling = 80;
|
matthiasm@0
|
126
|
matthiasm@0
|
127 // linear frequency vector
|
matthiasm@0
|
128 vector<float> fft_f;
|
matthiasm@0
|
129 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
130 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
131 }
|
matthiasm@0
|
132 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
133
|
matthiasm@0
|
134 // linear oversampled frequency vector
|
matthiasm@0
|
135 vector<float> oversampled_f;
|
matthiasm@0
|
136 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
137 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
138 }
|
matthiasm@0
|
139
|
matthiasm@0
|
140 // pitch-spaced frequency vector
|
matthiasm@0
|
141 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
142 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
143 vector<float> cq_f;
|
matthiasm@0
|
144 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
145 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
146 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
147 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
148 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
149 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
150 }
|
matthiasm@0
|
151 }
|
matthiasm@0
|
152 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
153 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
154
|
matthiasm@0
|
155 int nFFT = fft_f.size();
|
matthiasm@0
|
156
|
matthiasm@0
|
157 vector<float> fft_activation;
|
matthiasm@0
|
158 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
159 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
160 fft_activation.push_back(cosp);
|
matthiasm@0
|
161 // cerr << cosp << endl;
|
matthiasm@0
|
162 }
|
matthiasm@0
|
163
|
matthiasm@0
|
164 float cq_activation;
|
matthiasm@0
|
165 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
166 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
167 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
168 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
169 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
170 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
171 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
172 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
173 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
174 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
175 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
176 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
177 }
|
matthiasm@0
|
178 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
179 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
180 // }
|
matthiasm@0
|
181 }
|
matthiasm@0
|
182 }
|
matthiasm@0
|
183 }
|
matthiasm@0
|
184 return true;
|
matthiasm@0
|
185 }
|
matthiasm@0
|
186
|
matthiasm@3
|
187 bool dictionaryMatrix(float* dm) {
|
matthiasm@1
|
188 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
189 int minoctave = 0; // this must be 0
|
matthiasm@1
|
190 int maxoctave = 7; // this must be 7
|
matthiasm@4
|
191 float s_param = 0.7;
|
matthiasm@1
|
192
|
matthiasm@1
|
193 // pitch-spaced frequency vector
|
matthiasm@1
|
194 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
195 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
196 vector<float> cq_f;
|
matthiasm@1
|
197 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
198 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
199 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
200 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
201 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
202 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
203 }
|
matthiasm@1
|
204 }
|
matthiasm@1
|
205 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
206 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
207
|
matthiasm@1
|
208 float curr_f;
|
matthiasm@1
|
209 float floatbin;
|
matthiasm@1
|
210 float curr_amp;
|
matthiasm@1
|
211 // now for every combination calculate the matrix element
|
matthiasm@1
|
212 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@3
|
213 // cerr << iOut << endl;
|
matthiasm@1
|
214 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
215 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@3
|
216 // if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@3
|
217 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@3
|
218 // cerr << floatbin << endl;
|
matthiasm@1
|
219 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@3
|
220 // cerr << "curramp" << curr_amp << endl;
|
matthiasm@1
|
221 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@3
|
222 if (abs(iNote+1.0-floatbin)<2) {
|
matthiasm@3
|
223 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
matthiasm@3
|
224 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
matthiasm@3
|
225 }
|
matthiasm@1
|
226 }
|
matthiasm@3
|
227 }
|
matthiasm@1
|
228 }
|
matthiasm@3
|
229
|
matthiasm@3
|
230
|
matthiasm@1
|
231 }
|
matthiasm@1
|
232
|
matthiasm@7
|
233 string get_env_var( std::string const & key ) {
|
matthiasm@7
|
234 char * val;
|
matthiasm@7
|
235 val = getenv( key.c_str() );
|
matthiasm@7
|
236 string retval;
|
matthiasm@7
|
237 if (val != NULL) {
|
matthiasm@7
|
238 retval = val;
|
matthiasm@7
|
239 }
|
matthiasm@7
|
240 return retval;
|
matthiasm@7
|
241 }
|
matthiasm@7
|
242
|
matthiasm@7
|
243
|
matthiasm@9
|
244 vector<string> chordDictionary(vector<float> *mchorddict) {
|
matthiasm@7
|
245 // ifstream chordDictFile;
|
matthiasm@7
|
246 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
|
matthiasm@7
|
247 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
|
matthiasm@7
|
248 typedef tokenizer<char_separator<char> > Tok;
|
matthiasm@7
|
249 // char_separator<char> sep; // default constructed
|
matthiasm@7
|
250 char_separator<char> sep(",; ",":");
|
matthiasm@7
|
251 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
|
matthiasm@7
|
252 string line;
|
matthiasm@7
|
253 int iElement = 0;
|
matthiasm@7
|
254 int nChord = 0;
|
matthiasm@7
|
255
|
matthiasm@7
|
256 vector<string> loadedChordNames;
|
matthiasm@7
|
257 vector<float> loadedChordDict;
|
matthiasm@7
|
258 if (chordDictFile.is_open()) {
|
matthiasm@7
|
259 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
|
matthiasm@7
|
260 // first, get the chord definition
|
matthiasm@7
|
261 string chordType;
|
matthiasm@7
|
262 vector<float> tempPCVector;
|
matthiasm@7
|
263 // cerr << line << endl;
|
matthiasm@7
|
264 if (!line.empty() && line.substr(0,1) != "#") {
|
matthiasm@7
|
265 Tok tok(line, sep);
|
matthiasm@7
|
266 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
|
matthiasm@7
|
267 string tempString = *tok_iter;
|
matthiasm@7
|
268 // cerr << tempString << endl;
|
matthiasm@7
|
269 if (tok_iter == tok.begin()) { // either the chord name or a colon
|
matthiasm@7
|
270 if (tempString == ":") {
|
matthiasm@7
|
271 chordType = "";
|
matthiasm@7
|
272 } else {
|
matthiasm@7
|
273 chordType = tempString;
|
matthiasm@7
|
274 tok_iter++; // is this cheating ? :)
|
matthiasm@7
|
275 }
|
matthiasm@7
|
276 } else {
|
matthiasm@7
|
277 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
|
matthiasm@7
|
278 }
|
matthiasm@7
|
279 }
|
matthiasm@7
|
280
|
matthiasm@7
|
281 // now make all 12 chords of every type
|
matthiasm@7
|
282 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
|
matthiasm@7
|
283 // add bass slash notation
|
matthiasm@7
|
284 string slashNotation = "";
|
matthiasm@7
|
285 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
|
matthiasm@7
|
286 if (tempPCVector[(kSemitone) % 12] > 0.99) {
|
matthiasm@7
|
287 slashNotation = bassnames[iSemitone][kSemitone];
|
matthiasm@7
|
288 }
|
matthiasm@7
|
289 }
|
matthiasm@7
|
290 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
|
matthiasm@9
|
291 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
|
matthiasm@9
|
292 float bassValue = 0;
|
matthiasm@9
|
293 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
|
matthiasm@9
|
294 bassValue = 1;
|
matthiasm@9
|
295 } else {
|
matthiasm@10
|
296 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
|
matthiasm@9
|
297 }
|
matthiasm@9
|
298 loadedChordDict.push_back(bassValue);
|
matthiasm@7
|
299 }
|
matthiasm@7
|
300 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
|
matthiasm@7
|
301 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
|
matthiasm@7
|
302 }
|
matthiasm@7
|
303 ostringstream os;
|
matthiasm@7
|
304 if (slashNotation.empty()) {
|
matthiasm@7
|
305 os << notenames[12+iSemitone] << chordType;
|
matthiasm@7
|
306 } else {
|
matthiasm@7
|
307 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
|
matthiasm@7
|
308 }
|
matthiasm@7
|
309
|
matthiasm@7
|
310 loadedChordNames.push_back(os.str());
|
matthiasm@7
|
311 }
|
matthiasm@7
|
312 }
|
matthiasm@7
|
313 }
|
matthiasm@7
|
314 // N type
|
matthiasm@7
|
315 loadedChordNames.push_back("N");
|
matthiasm@7
|
316 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
|
matthiasm@7
|
317 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
|
matthiasm@7
|
318
|
matthiasm@7
|
319 // normalise
|
matthiasm@7
|
320 float sum = 0;
|
matthiasm@7
|
321 for (int i = 0; i < loadedChordDict.size(); i++) {
|
matthiasm@7
|
322 sum += pow(loadedChordDict[i],2);
|
matthiasm@7
|
323 if (i % 24 == 23) {
|
matthiasm@7
|
324 float invertedsum = 1.0/sqrt(sum);
|
matthiasm@7
|
325 for (int k = 0; k < 24; k++) {
|
matthiasm@7
|
326 loadedChordDict[i-k] *= invertedsum;
|
matthiasm@7
|
327 }
|
matthiasm@7
|
328 sum = 0;
|
matthiasm@7
|
329 }
|
matthiasm@7
|
330
|
matthiasm@7
|
331 }
|
matthiasm@7
|
332
|
matthiasm@7
|
333
|
matthiasm@7
|
334 nChord = 0;
|
matthiasm@7
|
335 for (int i = 0; i < loadedChordNames.size(); i++) {
|
matthiasm@7
|
336 nChord++;
|
matthiasm@7
|
337 }
|
matthiasm@7
|
338 chordDictFile.close();
|
matthiasm@7
|
339
|
matthiasm@7
|
340
|
matthiasm@9
|
341 // mchorddict = new float[nChord*24];
|
matthiasm@7
|
342 for (int i = 0; i < nChord*24; i++) {
|
matthiasm@9
|
343 mchorddict->push_back(loadedChordDict[i]);
|
matthiasm@7
|
344 }
|
matthiasm@9
|
345
|
matthiasm@7
|
346 } else {// use default from chorddict.cpp
|
matthiasm@9
|
347 // mchorddict = new float[nChorddict];
|
matthiasm@7
|
348 for (int i = 0; i < nChorddict; i++) {
|
matthiasm@9
|
349 mchorddict->push_back(chorddict[i]);
|
matthiasm@7
|
350 }
|
matthiasm@7
|
351
|
matthiasm@7
|
352 nChord = nChorddict/24;
|
matthiasm@7
|
353 // mchordnames = new string[nChorddict/24];
|
matthiasm@7
|
354 char buffer1 [50];
|
matthiasm@7
|
355 for (int i = 0; i < nChorddict/24; i++) {
|
matthiasm@7
|
356 if (i < nChorddict/24 - 1) {
|
matthiasm@7
|
357 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
|
matthiasm@7
|
358 } else {
|
matthiasm@7
|
359 sprintf(buffer1, "N");
|
matthiasm@7
|
360 }
|
matthiasm@7
|
361 ostringstream os;
|
matthiasm@7
|
362 os << buffer1;
|
matthiasm@9
|
363 loadedChordNames.push_back(os.str());
|
matthiasm@9
|
364
|
matthiasm@7
|
365 }
|
matthiasm@7
|
366
|
matthiasm@7
|
367 }
|
matthiasm@9
|
368 // cerr << "before leaving" << chordnames[1] << endl;
|
matthiasm@9
|
369 return loadedChordNames;
|
matthiasm@7
|
370 }
|
matthiasm@0
|
371
|
matthiasm@0
|
372 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
373 Plugin(inputSampleRate),
|
matthiasm@0
|
374 m_fl(0),
|
matthiasm@0
|
375 m_blockSize(0),
|
matthiasm@0
|
376 m_stepSize(0),
|
matthiasm@0
|
377 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
378 m_meanTuning0(0),
|
matthiasm@0
|
379 m_meanTuning1(0),
|
matthiasm@0
|
380 m_meanTuning2(0),
|
matthiasm@0
|
381 m_localTuning0(0),
|
matthiasm@0
|
382 m_localTuning1(0),
|
matthiasm@0
|
383 m_localTuning2(0),
|
matthiasm@4
|
384 m_paling(1.0),
|
matthiasm@3
|
385 m_preset(0.0),
|
matthiasm@0
|
386 m_localTuning(0),
|
matthiasm@0
|
387 m_kernelValue(0),
|
matthiasm@0
|
388 m_kernelFftIndex(0),
|
matthiasm@0
|
389 m_kernelNoteIndex(0),
|
matthiasm@1
|
390 m_dict(0),
|
matthiasm@0
|
391 m_tuneLocal(false),
|
matthiasm@7
|
392 m_dictID(0),
|
matthiasm@7
|
393 m_chorddict(0),
|
matthiasm@12
|
394 m_chordnames(0),
|
matthiasm@12
|
395 m_doNormalizeChroma(0)
|
matthiasm@0
|
396 {
|
matthiasm@0
|
397 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@7
|
398
|
matthiasm@7
|
399 // make the *note* dictionary matrix
|
matthiasm@3
|
400 m_dict = new float[nNote * 84];
|
matthiasm@3
|
401 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@1
|
402 dictionaryMatrix(m_dict);
|
matthiasm@7
|
403
|
matthiasm@7
|
404 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@9
|
405 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
406 }
|
matthiasm@0
|
407
|
matthiasm@0
|
408
|
matthiasm@0
|
409 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
410 {
|
matthiasm@0
|
411 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
412 delete [] m_dict;
|
matthiasm@9
|
413 // delete [] m_chorddict;
|
matthiasm@7
|
414 // delete m_chordnames;
|
matthiasm@0
|
415 }
|
matthiasm@0
|
416
|
matthiasm@0
|
417 string
|
matthiasm@0
|
418 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
419 {
|
matthiasm@0
|
420 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
421 return "nnls_chroma";
|
matthiasm@0
|
422 }
|
matthiasm@0
|
423
|
matthiasm@0
|
424 string
|
matthiasm@0
|
425 NNLSChroma::getName() const
|
matthiasm@0
|
426 {
|
matthiasm@0
|
427 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
428 return "NNLS Chroma";
|
matthiasm@0
|
429 }
|
matthiasm@0
|
430
|
matthiasm@0
|
431 string
|
matthiasm@0
|
432 NNLSChroma::getDescription() const
|
matthiasm@0
|
433 {
|
matthiasm@0
|
434 // Return something helpful here!
|
matthiasm@0
|
435 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@13
|
436 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
|
matthiasm@0
|
437 }
|
matthiasm@0
|
438
|
matthiasm@0
|
439 string
|
matthiasm@0
|
440 NNLSChroma::getMaker() const
|
matthiasm@0
|
441 {
|
matthiasm@0
|
442 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
443 // Your name here
|
matthiasm@0
|
444 return "Matthias Mauch";
|
matthiasm@0
|
445 }
|
matthiasm@0
|
446
|
matthiasm@0
|
447 int
|
matthiasm@0
|
448 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
449 {
|
matthiasm@0
|
450 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
451 // Increment this each time you release a version that behaves
|
matthiasm@0
|
452 // differently from the previous one
|
matthiasm@0
|
453 return 1;
|
matthiasm@0
|
454 }
|
matthiasm@0
|
455
|
matthiasm@0
|
456 string
|
matthiasm@0
|
457 NNLSChroma::getCopyright() const
|
matthiasm@0
|
458 {
|
matthiasm@0
|
459 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
460 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
461 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
462 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
463 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
464 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
465 }
|
matthiasm@0
|
466
|
matthiasm@0
|
467 NNLSChroma::InputDomain
|
matthiasm@0
|
468 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
469 {
|
matthiasm@0
|
470 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
471 return FrequencyDomain;
|
matthiasm@0
|
472 }
|
matthiasm@0
|
473
|
matthiasm@0
|
474 size_t
|
matthiasm@0
|
475 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
476 {
|
matthiasm@0
|
477 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
478 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
479 }
|
matthiasm@0
|
480
|
matthiasm@0
|
481 size_t
|
matthiasm@0
|
482 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
483 {
|
matthiasm@0
|
484 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
485 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
486 // means the same as the block size for TimeDomain
|
matthiasm@0
|
487 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
488 }
|
matthiasm@0
|
489
|
matthiasm@0
|
490 size_t
|
matthiasm@0
|
491 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
492 {
|
matthiasm@0
|
493 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
494 return 1;
|
matthiasm@0
|
495 }
|
matthiasm@0
|
496
|
matthiasm@0
|
497 size_t
|
matthiasm@0
|
498 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
499 {
|
matthiasm@0
|
500 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
501 return 1;
|
matthiasm@0
|
502 }
|
matthiasm@0
|
503
|
matthiasm@0
|
504 NNLSChroma::ParameterList
|
matthiasm@0
|
505 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
506 {
|
matthiasm@0
|
507 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
508 ParameterList list;
|
matthiasm@0
|
509
|
matthiasm@3
|
510 ParameterDescriptor d3;
|
matthiasm@3
|
511 d3.identifier = "preset";
|
matthiasm@3
|
512 d3.name = "preset";
|
matthiasm@3
|
513 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
514 d3.unit = "";
|
matthiasm@3
|
515 d3.isQuantized = true;
|
matthiasm@3
|
516 d3.quantizeStep = 1;
|
matthiasm@3
|
517 d3.minValue = 0.0;
|
matthiasm@4
|
518 d3.maxValue = 3.0;
|
matthiasm@3
|
519 d3.defaultValue = 0.0;
|
matthiasm@3
|
520 d3.valueNames.push_back("polyphonic pop");
|
matthiasm@3
|
521 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
522 d3.valueNames.push_back("solo keyboard");
|
matthiasm@3
|
523 d3.valueNames.push_back("manual");
|
matthiasm@3
|
524 list.push_back(d3);
|
matthiasm@4
|
525
|
matthiasm@4
|
526 // ParameterDescriptor d0;
|
matthiasm@4
|
527 // d0.identifier = "notedict";
|
matthiasm@4
|
528 // d0.name = "note dictionary";
|
matthiasm@4
|
529 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
530 // d0.unit = "";
|
matthiasm@4
|
531 // d0.minValue = 0;
|
matthiasm@4
|
532 // d0.maxValue = 1;
|
matthiasm@4
|
533 // d0.defaultValue = 0;
|
matthiasm@4
|
534 // d0.isQuantized = true;
|
matthiasm@4
|
535 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
536 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
537 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
538 // list.push_back(d0);
|
matthiasm@4
|
539
|
matthiasm@4
|
540 ParameterDescriptor d1;
|
matthiasm@4
|
541 d1.identifier = "tuningmode";
|
matthiasm@4
|
542 d1.name = "tuning mode";
|
matthiasm@4
|
543 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
544 d1.unit = "";
|
matthiasm@4
|
545 d1.minValue = 0;
|
matthiasm@4
|
546 d1.maxValue = 1;
|
matthiasm@4
|
547 d1.defaultValue = 0;
|
matthiasm@4
|
548 d1.isQuantized = true;
|
matthiasm@4
|
549 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
550 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
551 d1.quantizeStep = 1.0;
|
matthiasm@4
|
552 list.push_back(d1);
|
matthiasm@4
|
553
|
matthiasm@4
|
554 // ParameterDescriptor d2;
|
matthiasm@4
|
555 // d2.identifier = "paling";
|
matthiasm@4
|
556 // d2.name = "spectral paling";
|
matthiasm@4
|
557 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@4
|
558 // d2.unit = "";
|
matthiasm@4
|
559 // d2.isQuantized = true;
|
matthiasm@4
|
560 // // d2.quantizeStep = 0.1;
|
matthiasm@4
|
561 // d2.minValue = 0.0;
|
matthiasm@4
|
562 // d2.maxValue = 1.0;
|
matthiasm@4
|
563 // d2.defaultValue = 1.0;
|
matthiasm@4
|
564 // d2.isQuantized = false;
|
matthiasm@4
|
565 // list.push_back(d2);
|
matthiasm@12
|
566 ParameterDescriptor d4;
|
matthiasm@12
|
567 d4.identifier = "chromanormalize";
|
matthiasm@12
|
568 d4.name = "chroma normalization";
|
matthiasm@12
|
569 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
570 d4.unit = "";
|
matthiasm@12
|
571 d4.minValue = 0;
|
matthiasm@13
|
572 d4.maxValue = 3;
|
matthiasm@12
|
573 d4.defaultValue = 0;
|
matthiasm@12
|
574 d4.isQuantized = true;
|
matthiasm@13
|
575 d4.valueNames.push_back("none");
|
matthiasm@13
|
576 d4.valueNames.push_back("maximum norm");
|
matthiasm@13
|
577 d4.valueNames.push_back("L1 norm");
|
matthiasm@13
|
578 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
579 d4.quantizeStep = 1.0;
|
matthiasm@12
|
580 list.push_back(d4);
|
matthiasm@4
|
581
|
matthiasm@0
|
582 return list;
|
matthiasm@0
|
583 }
|
matthiasm@0
|
584
|
matthiasm@0
|
585 float
|
matthiasm@0
|
586 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
587 {
|
matthiasm@3
|
588 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
589 if (identifier == "notedict") {
|
matthiasm@0
|
590 return m_dictID;
|
matthiasm@0
|
591 }
|
matthiasm@0
|
592
|
matthiasm@0
|
593 if (identifier == "paling") {
|
matthiasm@0
|
594 return m_paling;
|
matthiasm@0
|
595 }
|
matthiasm@0
|
596
|
matthiasm@0
|
597 if (identifier == "tuningmode") {
|
matthiasm@0
|
598 if (m_tuneLocal) {
|
matthiasm@0
|
599 return 1.0;
|
matthiasm@0
|
600 } else {
|
matthiasm@0
|
601 return 0.0;
|
matthiasm@0
|
602 }
|
matthiasm@0
|
603 }
|
matthiasm@3
|
604 if (identifier == "preset") {
|
matthiasm@3
|
605 return m_preset;
|
matthiasm@3
|
606 }
|
matthiasm@12
|
607 if (identifier == "chromanormalize") {
|
matthiasm@12
|
608 return m_doNormalizeChroma;
|
matthiasm@12
|
609 }
|
matthiasm@0
|
610 return 0;
|
matthiasm@0
|
611
|
matthiasm@0
|
612 }
|
matthiasm@0
|
613
|
matthiasm@0
|
614 void
|
matthiasm@0
|
615 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
616 {
|
matthiasm@3
|
617 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
618 if (identifier == "notedict") {
|
matthiasm@0
|
619 m_dictID = (int) value;
|
matthiasm@0
|
620 }
|
matthiasm@0
|
621
|
matthiasm@0
|
622 if (identifier == "paling") {
|
matthiasm@0
|
623 m_paling = value;
|
matthiasm@0
|
624 }
|
matthiasm@0
|
625
|
matthiasm@0
|
626 if (identifier == "tuningmode") {
|
matthiasm@0
|
627 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
628 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
629 }
|
matthiasm@3
|
630 if (identifier == "preset") {
|
matthiasm@3
|
631 m_preset = value;
|
matthiasm@3
|
632 if (m_preset == 0.0) {
|
matthiasm@3
|
633 m_tuneLocal = false;
|
matthiasm@3
|
634 m_paling = 1.0;
|
matthiasm@3
|
635 m_dictID = 0.0;
|
matthiasm@3
|
636 }
|
matthiasm@3
|
637 if (m_preset == 1.0) {
|
matthiasm@3
|
638 m_tuneLocal = false;
|
matthiasm@3
|
639 m_paling = 1.0;
|
matthiasm@3
|
640 m_dictID = 1.0;
|
matthiasm@3
|
641 }
|
matthiasm@3
|
642 if (m_preset == 2.0) {
|
matthiasm@3
|
643 m_tuneLocal = false;
|
matthiasm@3
|
644 m_paling = 0.7;
|
matthiasm@3
|
645 m_dictID = 0.0;
|
matthiasm@3
|
646 }
|
matthiasm@3
|
647 }
|
matthiasm@12
|
648 if (identifier == "chromanormalize") {
|
matthiasm@12
|
649 m_doNormalizeChroma = value;
|
matthiasm@12
|
650 }
|
matthiasm@0
|
651 }
|
matthiasm@0
|
652
|
matthiasm@0
|
653 NNLSChroma::ProgramList
|
matthiasm@0
|
654 NNLSChroma::getPrograms() const
|
matthiasm@0
|
655 {
|
matthiasm@0
|
656 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
657 ProgramList list;
|
matthiasm@0
|
658
|
matthiasm@0
|
659 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
660 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
661
|
matthiasm@0
|
662 return list;
|
matthiasm@0
|
663 }
|
matthiasm@0
|
664
|
matthiasm@0
|
665 string
|
matthiasm@0
|
666 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
667 {
|
matthiasm@0
|
668 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
669 return ""; // no programs
|
matthiasm@0
|
670 }
|
matthiasm@0
|
671
|
matthiasm@0
|
672 void
|
matthiasm@0
|
673 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
674 {
|
matthiasm@0
|
675 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
676 }
|
matthiasm@0
|
677
|
matthiasm@0
|
678
|
matthiasm@0
|
679 NNLSChroma::OutputList
|
matthiasm@0
|
680 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
681 {
|
matthiasm@0
|
682 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
683 OutputList list;
|
matthiasm@0
|
684
|
matthiasm@0
|
685 // Make chroma names for the binNames property
|
matthiasm@0
|
686 vector<string> chromanames;
|
matthiasm@0
|
687 vector<string> bothchromanames;
|
matthiasm@0
|
688 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
689 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
690 if (iNote < 12) {
|
matthiasm@0
|
691 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
692 }
|
matthiasm@0
|
693 }
|
matthiasm@0
|
694
|
matthiasm@1
|
695 // int nNote = 84;
|
matthiasm@0
|
696
|
matthiasm@0
|
697 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
698 // Every plugin must have at least one output.
|
matthiasm@0
|
699
|
matthiasm@0
|
700 OutputDescriptor d0;
|
matthiasm@0
|
701 d0.identifier = "tuning";
|
matthiasm@0
|
702 d0.name = "Tuning";
|
matthiasm@0
|
703 d0.description = "The concert pitch.";
|
matthiasm@0
|
704 d0.unit = "Hz";
|
matthiasm@0
|
705 d0.hasFixedBinCount = true;
|
matthiasm@0
|
706 d0.binCount = 0;
|
matthiasm@0
|
707 d0.hasKnownExtents = true;
|
matthiasm@0
|
708 d0.minValue = 427.47;
|
matthiasm@0
|
709 d0.maxValue = 452.89;
|
matthiasm@0
|
710 d0.isQuantized = false;
|
matthiasm@0
|
711 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
712 d0.hasDuration = false;
|
matthiasm@0
|
713 list.push_back(d0);
|
matthiasm@0
|
714
|
matthiasm@0
|
715 OutputDescriptor d1;
|
matthiasm@0
|
716 d1.identifier = "logfreqspec";
|
matthiasm@0
|
717 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
718 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
719 d1.unit = "";
|
matthiasm@0
|
720 d1.hasFixedBinCount = true;
|
matthiasm@0
|
721 d1.binCount = nNote;
|
matthiasm@0
|
722 d1.hasKnownExtents = false;
|
matthiasm@0
|
723 d1.isQuantized = false;
|
matthiasm@0
|
724 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
725 d1.hasDuration = false;
|
matthiasm@0
|
726 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
727 list.push_back(d1);
|
matthiasm@0
|
728
|
matthiasm@0
|
729 OutputDescriptor d2;
|
matthiasm@0
|
730 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
731 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
732 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
733 d2.unit = "";
|
matthiasm@0
|
734 d2.hasFixedBinCount = true;
|
matthiasm@0
|
735 d2.binCount = 256;
|
matthiasm@0
|
736 d2.hasKnownExtents = false;
|
matthiasm@0
|
737 d2.isQuantized = false;
|
matthiasm@0
|
738 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
739 d2.hasDuration = false;
|
matthiasm@0
|
740 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
741 list.push_back(d2);
|
matthiasm@0
|
742
|
matthiasm@0
|
743 OutputDescriptor d3;
|
matthiasm@0
|
744 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
745 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
746 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
747 d3.unit = "";
|
matthiasm@0
|
748 d3.hasFixedBinCount = true;
|
matthiasm@0
|
749 d3.binCount = 84;
|
matthiasm@0
|
750 d3.hasKnownExtents = false;
|
matthiasm@0
|
751 d3.isQuantized = false;
|
matthiasm@0
|
752 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
753 d3.hasDuration = false;
|
matthiasm@0
|
754 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
755 list.push_back(d3);
|
matthiasm@0
|
756
|
matthiasm@0
|
757 OutputDescriptor d4;
|
matthiasm@0
|
758 d4.identifier = "chroma";
|
matthiasm@0
|
759 d4.name = "Chromagram";
|
matthiasm@0
|
760 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
761 d4.unit = "";
|
matthiasm@0
|
762 d4.hasFixedBinCount = true;
|
matthiasm@0
|
763 d4.binCount = 12;
|
matthiasm@0
|
764 d4.binNames = chromanames;
|
matthiasm@0
|
765 d4.hasKnownExtents = false;
|
matthiasm@0
|
766 d4.isQuantized = false;
|
matthiasm@0
|
767 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
768 d4.hasDuration = false;
|
matthiasm@0
|
769 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
770 list.push_back(d4);
|
matthiasm@0
|
771
|
matthiasm@0
|
772 OutputDescriptor d5;
|
matthiasm@0
|
773 d5.identifier = "basschroma";
|
matthiasm@0
|
774 d5.name = "Bass Chromagram";
|
matthiasm@0
|
775 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
776 d5.unit = "";
|
matthiasm@0
|
777 d5.hasFixedBinCount = true;
|
matthiasm@0
|
778 d5.binCount = 12;
|
matthiasm@0
|
779 d5.binNames = chromanames;
|
matthiasm@0
|
780 d5.hasKnownExtents = false;
|
matthiasm@0
|
781 d5.isQuantized = false;
|
matthiasm@0
|
782 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
783 d5.hasDuration = false;
|
matthiasm@0
|
784 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
785 list.push_back(d5);
|
matthiasm@0
|
786
|
matthiasm@0
|
787 OutputDescriptor d6;
|
matthiasm@0
|
788 d6.identifier = "bothchroma";
|
matthiasm@0
|
789 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
790 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
791 d6.unit = "";
|
matthiasm@0
|
792 d6.hasFixedBinCount = true;
|
matthiasm@0
|
793 d6.binCount = 24;
|
matthiasm@0
|
794 d6.binNames = bothchromanames;
|
matthiasm@0
|
795 d6.hasKnownExtents = false;
|
matthiasm@0
|
796 d6.isQuantized = false;
|
matthiasm@0
|
797 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
798 d6.hasDuration = false;
|
matthiasm@0
|
799 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
800 list.push_back(d6);
|
matthiasm@0
|
801
|
matthiasm@0
|
802 OutputDescriptor d7;
|
matthiasm@0
|
803 d7.identifier = "simplechord";
|
matthiasm@0
|
804 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
805 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
806 d7.unit = "";
|
matthiasm@0
|
807 d7.hasFixedBinCount = true;
|
matthiasm@0
|
808 d7.binCount = 0;
|
matthiasm@0
|
809 d7.hasKnownExtents = false;
|
matthiasm@0
|
810 d7.isQuantized = false;
|
matthiasm@0
|
811 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
812 d7.hasDuration = false;
|
matthiasm@0
|
813 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
814 list.push_back(d7);
|
matthiasm@0
|
815
|
matthiasm@1
|
816 // OutputDescriptor d8;
|
matthiasm@1
|
817 // d8.identifier = "inconsistency";
|
matthiasm@1
|
818 // d8.name = "Harmonic inconsistency value";
|
matthiasm@1
|
819 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@1
|
820 // d8.unit = "";
|
matthiasm@1
|
821 // d8.hasFixedBinCount = true;
|
matthiasm@1
|
822 // d8.binCount = 1;
|
matthiasm@1
|
823 // d8.hasKnownExtents = false;
|
matthiasm@1
|
824 // d8.isQuantized = false;
|
matthiasm@1
|
825 // d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
826 // d8.hasDuration = false;
|
matthiasm@1
|
827 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
828 // list.push_back(d8);
|
matthiasm@1
|
829 //
|
matthiasm@1
|
830 // OutputDescriptor d9;
|
matthiasm@1
|
831 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
832 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
833 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
834 // d9.unit = "";
|
matthiasm@1
|
835 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
836 // d9.binCount = 0;
|
matthiasm@1
|
837 // d9.hasKnownExtents = true;
|
matthiasm@1
|
838 // d9.minValue = 0.1;
|
matthiasm@1
|
839 // d9.maxValue = 0.9;
|
matthiasm@1
|
840 // d9.isQuantized = false;
|
matthiasm@1
|
841 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
842 // d9.hasDuration = false;
|
matthiasm@1
|
843 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
844 // list.push_back(d9);
|
matthiasm@1
|
845 //
|
matthiasm@1
|
846 OutputDescriptor d10;
|
matthiasm@1
|
847 d10.identifier = "localtuning";
|
matthiasm@1
|
848 d10.name = "Local tuning";
|
matthiasm@4
|
849 d10.description = "Tuning based on the history up to this timestamp.";
|
matthiasm@1
|
850 d10.unit = "Hz";
|
matthiasm@1
|
851 d10.hasFixedBinCount = true;
|
matthiasm@1
|
852 d10.binCount = 1;
|
matthiasm@1
|
853 d10.hasKnownExtents = true;
|
matthiasm@1
|
854 d10.minValue = 427.47;
|
matthiasm@1
|
855 d10.maxValue = 452.89;
|
matthiasm@1
|
856 d10.isQuantized = false;
|
matthiasm@3
|
857 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
858 d10.hasDuration = false;
|
matthiasm@3
|
859 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
860 list.push_back(d10);
|
matthiasm@1
|
861
|
matthiasm@0
|
862 return list;
|
matthiasm@0
|
863 }
|
matthiasm@0
|
864
|
matthiasm@0
|
865
|
matthiasm@0
|
866 bool
|
matthiasm@0
|
867 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
868 {
|
matthiasm@1
|
869 if (debug_on) {
|
matthiasm@1
|
870 cerr << "--> initialise";
|
matthiasm@1
|
871 }
|
matthiasm@1
|
872
|
matthiasm@0
|
873 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
874 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
875 m_blockSize = blockSize;
|
matthiasm@0
|
876 m_stepSize = stepSize;
|
matthiasm@0
|
877 frameCount = 0;
|
matthiasm@0
|
878 int tempn = 256 * m_blockSize/2;
|
matthiasm@4
|
879 // cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
880 float *tempkernel;
|
matthiasm@1
|
881
|
matthiasm@1
|
882 tempkernel = new float[tempn];
|
matthiasm@1
|
883
|
matthiasm@0
|
884 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
885 m_kernelValue.clear();
|
matthiasm@1
|
886 m_kernelFftIndex.clear();
|
matthiasm@1
|
887 m_kernelNoteIndex.clear();
|
matthiasm@1
|
888 int countNonzero = 0;
|
matthiasm@0
|
889 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
890 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
891 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
892 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
893 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
894 countNonzero++;
|
matthiasm@0
|
895 }
|
matthiasm@1
|
896 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
897 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
898 }
|
matthiasm@0
|
899 }
|
matthiasm@1
|
900 }
|
matthiasm@4
|
901 // cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
902 delete [] tempkernel;
|
matthiasm@3
|
903 ofstream myfile;
|
matthiasm@3
|
904 myfile.open ("matrix.txt");
|
matthiasm@3
|
905 // myfile << "Writing this to a file.\n";
|
matthiasm@3
|
906 for (int i = 0; i < nNote * 84; ++i) {
|
matthiasm@3
|
907 myfile << m_dict[i] << endl;
|
matthiasm@3
|
908 }
|
matthiasm@3
|
909 myfile.close();
|
matthiasm@0
|
910 return true;
|
matthiasm@0
|
911 }
|
matthiasm@0
|
912
|
matthiasm@0
|
913 void
|
matthiasm@0
|
914 NNLSChroma::reset()
|
matthiasm@0
|
915 {
|
matthiasm@4
|
916 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
917
|
matthiasm@0
|
918 // Clear buffers, reset stored values, etc
|
matthiasm@4
|
919 frameCount = 0;
|
matthiasm@4
|
920 m_dictID = 0;
|
matthiasm@4
|
921 m_fl.clear();
|
matthiasm@4
|
922 m_meanTuning0 = 0;
|
matthiasm@4
|
923 m_meanTuning1 = 0;
|
matthiasm@4
|
924 m_meanTuning2 = 0;
|
matthiasm@4
|
925 m_localTuning0 = 0;
|
matthiasm@4
|
926 m_localTuning1 = 0;
|
matthiasm@4
|
927 m_localTuning2 = 0;
|
matthiasm@4
|
928 m_localTuning.clear();
|
matthiasm@0
|
929 }
|
matthiasm@0
|
930
|
matthiasm@0
|
931 NNLSChroma::FeatureSet
|
matthiasm@0
|
932 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
933 {
|
matthiasm@4
|
934 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
935 frameCount++;
|
matthiasm@0
|
936 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
937
|
matthiasm@0
|
938 Feature f10; // local tuning
|
matthiasm@3
|
939 f10.hasTimestamp = true;
|
matthiasm@4
|
940 f10.timestamp = timestamp;
|
matthiasm@0
|
941 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
942
|
matthiasm@0
|
943 // make magnitude
|
matthiasm@14
|
944 float maxmag = -10000;
|
matthiasm@0
|
945 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
946 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@14
|
947 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@14
|
948 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
matthiasm@14
|
949 }
|
matthiasm@14
|
950
|
matthiasm@14
|
951 if (maxmag < 12) {
|
matthiasm@14
|
952 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
matthiasm@14
|
953 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@14
|
954 magnitude[iBin] = 0;
|
matthiasm@14
|
955 }
|
matthiasm@0
|
956 }
|
matthiasm@4
|
957
|
matthiasm@0
|
958 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
959 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
960 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
961 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
962 }
|
matthiasm@0
|
963 int binCount = 0;
|
matthiasm@0
|
964 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
965 // cerr << ".";
|
matthiasm@1
|
966 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
967 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
968 binCount++;
|
matthiasm@0
|
969 }
|
matthiasm@1
|
970 // cerr << nm[20];
|
matthiasm@1
|
971 // cerr << endl;
|
matthiasm@0
|
972
|
matthiasm@0
|
973
|
matthiasm@0
|
974 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
975 // update means of complex tuning variables
|
matthiasm@0
|
976 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
977 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
978 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
979
|
matthiasm@0
|
980 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
981 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
982 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
983 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@3
|
984 float ratioOld = 0.997;
|
matthiasm@3
|
985 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
986 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
987 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
988 }
|
matthiasm@0
|
989
|
matthiasm@0
|
990 // if (m_tuneLocal) {
|
matthiasm@0
|
991 // local tuning
|
matthiasm@0
|
992 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
993 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
994 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
995 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
996 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
997 f10.values.push_back(tuning440);
|
matthiasm@3
|
998 // cerr << tuning440 << endl;
|
matthiasm@0
|
999 // }
|
matthiasm@0
|
1000
|
matthiasm@0
|
1001 Feature f1; // logfreqspec
|
matthiasm@0
|
1002 f1.hasTimestamp = true;
|
matthiasm@0
|
1003 f1.timestamp = timestamp;
|
matthiasm@0
|
1004 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
1005 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
1006 }
|
matthiasm@0
|
1007
|
matthiasm@0
|
1008 FeatureSet fs;
|
matthiasm@0
|
1009 fs[1].push_back(f1);
|
matthiasm@3
|
1010 fs[8].push_back(f10);
|
matthiasm@0
|
1011
|
matthiasm@0
|
1012 // deletes
|
matthiasm@0
|
1013 delete[] magnitude;
|
matthiasm@0
|
1014 delete[] nm;
|
matthiasm@0
|
1015
|
matthiasm@0
|
1016 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@7
|
1017 char * pPath;
|
matthiasm@7
|
1018 pPath = getenv ("VAMP_PATH");
|
matthiasm@7
|
1019
|
matthiasm@7
|
1020
|
matthiasm@0
|
1021 return fs;
|
matthiasm@0
|
1022 }
|
matthiasm@0
|
1023
|
matthiasm@0
|
1024 NNLSChroma::FeatureSet
|
matthiasm@0
|
1025 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
1026 {
|
matthiasm@4
|
1027 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@4
|
1028 FeatureSet fsOut;
|
matthiasm@4
|
1029 if (m_fl.size() == 0) return fsOut;
|
matthiasm@9
|
1030 int nChord = m_chordnames.size();
|
matthiasm@0
|
1031 //
|
matthiasm@1
|
1032 /** Calculate Tuning
|
matthiasm@1
|
1033 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
1034 cumulative mean real and imag values)
|
matthiasm@1
|
1035 **/
|
matthiasm@1
|
1036 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
1037 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
1038 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
1039 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
1040 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
1041 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1042
|
matthiasm@1
|
1043 char buffer0 [50];
|
matthiasm@1
|
1044
|
matthiasm@1
|
1045 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
1046
|
matthiasm@1
|
1047 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
1048
|
matthiasm@1
|
1049 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
1050 Feature f0; // tuning
|
matthiasm@1
|
1051 f0.hasTimestamp = true;
|
matthiasm@1
|
1052 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
1053 f0.label = buffer0;
|
matthiasm@1
|
1054 fsOut[0].push_back(f0);
|
matthiasm@1
|
1055
|
matthiasm@1
|
1056 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
1057 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
1058 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@13
|
1059 **/
|
matthiasm@13
|
1060 cerr << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
1061
|
matthiasm@1
|
1062 float tempValue = 0;
|
matthiasm@1
|
1063 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
1064 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
1065 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
1066 int count = 0;
|
matthiasm@1
|
1067
|
matthiasm@1
|
1068 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
1069 Feature f1 = *i;
|
matthiasm@1
|
1070 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
1071 f2.hasTimestamp = true;
|
matthiasm@1
|
1072 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
1073 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
1074
|
matthiasm@1
|
1075 if (m_tuneLocal) {
|
matthiasm@1
|
1076 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
1077 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1078 }
|
matthiasm@1
|
1079
|
matthiasm@1
|
1080 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
1081
|
matthiasm@4
|
1082 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
1083 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
1084 f2.values.push_back(tempValue);
|
matthiasm@1
|
1085 }
|
matthiasm@1
|
1086
|
matthiasm@1
|
1087 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
1088 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
1089 vector<float> runningstd;
|
matthiasm@1
|
1090 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
1091 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
1092 }
|
matthiasm@1
|
1093 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
1094 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
1095 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
1096 if (runningstd[i] > 0) {
|
matthiasm@1
|
1097 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
1098 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1099 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
1100 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1101 }
|
matthiasm@1
|
1102 if (f2.values[i] < 0) {
|
matthiasm@1
|
1103 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
1104 }
|
matthiasm@1
|
1105 }
|
matthiasm@1
|
1106 fsOut[2].push_back(f2);
|
matthiasm@1
|
1107 count++;
|
matthiasm@1
|
1108 }
|
matthiasm@13
|
1109 cerr << "done." << endl;
|
matthiasm@1
|
1110
|
matthiasm@1
|
1111 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
1112 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
1113 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
1114 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
1115 bass and treble stacked onto each other).
|
matthiasm@1
|
1116 **/
|
matthiasm@13
|
1117 if (m_dictID == 1) {
|
matthiasm@13
|
1118 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
matthiasm@13
|
1119 } else {
|
matthiasm@13
|
1120 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
matthiasm@13
|
1121 }
|
matthiasm@13
|
1122
|
matthiasm@1
|
1123
|
matthiasm@1
|
1124 vector<vector<float> > chordogram;
|
matthiasm@3
|
1125 vector<vector<int> > scoreChordogram;
|
matthiasm@1
|
1126 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
1127 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
1128 count = 0;
|
matthiasm@9
|
1129
|
matthiasm@1
|
1130 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
1131 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
1132 Feature f3; // semitone spectrum
|
matthiasm@1
|
1133 Feature f4; // treble chromagram
|
matthiasm@1
|
1134 Feature f5; // bass chromagram
|
matthiasm@1
|
1135 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
1136
|
matthiasm@1
|
1137 f3.hasTimestamp = true;
|
matthiasm@1
|
1138 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
1139
|
matthiasm@1
|
1140 f4.hasTimestamp = true;
|
matthiasm@1
|
1141 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
1142
|
matthiasm@1
|
1143 f5.hasTimestamp = true;
|
matthiasm@1
|
1144 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
1145
|
matthiasm@1
|
1146 f6.hasTimestamp = true;
|
matthiasm@1
|
1147 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
1148
|
matthiasm@3
|
1149 float b[256];
|
matthiasm@1
|
1150
|
matthiasm@1
|
1151 bool some_b_greater_zero = false;
|
matthiasm@3
|
1152 float sumb = 0;
|
matthiasm@1
|
1153 for (int i = 0; i < 256; i++) {
|
matthiasm@3
|
1154 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@3
|
1155 b[i] = f2.values[i];
|
matthiasm@3
|
1156 sumb += b[i];
|
matthiasm@1
|
1157 if (b[i] > 0) {
|
matthiasm@1
|
1158 some_b_greater_zero = true;
|
matthiasm@1
|
1159 }
|
matthiasm@1
|
1160 }
|
matthiasm@1
|
1161
|
matthiasm@1
|
1162 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
1163
|
matthiasm@1
|
1164 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
1165 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
1166 float currval;
|
matthiasm@1
|
1167 unsigned iSemitone = 0;
|
matthiasm@1
|
1168
|
matthiasm@1
|
1169 if (some_b_greater_zero) {
|
matthiasm@3
|
1170 if (m_dictID == 1) {
|
matthiasm@1
|
1171 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
1172 currval = 0;
|
matthiasm@3
|
1173 currval += b[iNote + 1 + -1] * 0.5;
|
matthiasm@3
|
1174 currval += b[iNote + 1 + 0] * 1.0;
|
matthiasm@3
|
1175 currval += b[iNote + 1 + 1] * 0.5;
|
matthiasm@1
|
1176 f3.values.push_back(currval);
|
matthiasm@1
|
1177 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
1178 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
1179 iSemitone++;
|
matthiasm@1
|
1180 }
|
matthiasm@1
|
1181
|
matthiasm@1
|
1182 } else {
|
matthiasm@3
|
1183 float x[84+1000];
|
matthiasm@3
|
1184 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
matthiasm@10
|
1185 vector<int> signifIndex;
|
matthiasm@10
|
1186 int index=0;
|
matthiasm@10
|
1187 sumb /= 84.0;
|
matthiasm@10
|
1188 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@10
|
1189 float currval = 0;
|
matthiasm@10
|
1190 currval += b[iNote + 1 + -1];
|
matthiasm@10
|
1191 currval += b[iNote + 1 + 0];
|
matthiasm@10
|
1192 currval += b[iNote + 1 + 1];
|
matthiasm@10
|
1193 if (currval > 0) signifIndex.push_back(index);
|
matthiasm@10
|
1194 f3.values.push_back(0); // fill the values, change later
|
matthiasm@10
|
1195 index++;
|
matthiasm@10
|
1196 }
|
matthiasm@3
|
1197 float rnorm;
|
matthiasm@3
|
1198 float w[84+1000];
|
matthiasm@3
|
1199 float zz[84+1000];
|
matthiasm@3
|
1200 int indx[84+1000];
|
matthiasm@1
|
1201 int mode;
|
matthiasm@10
|
1202 int dictsize = 256*signifIndex.size();
|
matthiasm@10
|
1203 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
matthiasm@10
|
1204 float *curr_dict = new float[dictsize];
|
matthiasm@10
|
1205 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
matthiasm@10
|
1206 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
matthiasm@10
|
1207 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
matthiasm@10
|
1208 }
|
matthiasm@3
|
1209 }
|
matthiasm@10
|
1210 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@10
|
1211 delete [] curr_dict;
|
matthiasm@10
|
1212 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
matthiasm@10
|
1213 f3.values[signifIndex[iNote]] = x[iNote];
|
matthiasm@3
|
1214 // cerr << mode << endl;
|
matthiasm@10
|
1215 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
matthiasm@10
|
1216 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
matthiasm@3
|
1217 }
|
matthiasm@1
|
1218 }
|
matthiasm@1
|
1219 }
|
matthiasm@13
|
1220
|
matthiasm@10
|
1221
|
matthiasm@12
|
1222
|
matthiasm@13
|
1223
|
matthiasm@12
|
1224 f4.values = chroma;
|
matthiasm@1
|
1225 f5.values = basschroma;
|
matthiasm@1
|
1226 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
1227 f6.values = chroma;
|
matthiasm@1
|
1228
|
matthiasm@13
|
1229 if (m_doNormalizeChroma > 0) {
|
matthiasm@13
|
1230 vector<float> chromanorm = vector<float>(3,0);
|
matthiasm@13
|
1231 switch (int(m_doNormalizeChroma)) {
|
matthiasm@13
|
1232 case 0: // should never end up here
|
matthiasm@13
|
1233 break;
|
matthiasm@13
|
1234 case 1:
|
matthiasm@13
|
1235 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
matthiasm@13
|
1236 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
matthiasm@13
|
1237 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
matthiasm@13
|
1238 break;
|
matthiasm@13
|
1239 case 2:
|
matthiasm@13
|
1240 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
matthiasm@13
|
1241 chromanorm[0] += *it;
|
matthiasm@13
|
1242 }
|
matthiasm@13
|
1243 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
matthiasm@13
|
1244 chromanorm[1] += *it;
|
matthiasm@13
|
1245 }
|
matthiasm@13
|
1246 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
matthiasm@13
|
1247 chromanorm[2] += *it;
|
matthiasm@13
|
1248 }
|
matthiasm@13
|
1249 break;
|
matthiasm@13
|
1250 case 3:
|
matthiasm@13
|
1251 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
matthiasm@13
|
1252 chromanorm[0] += pow(*it,2);
|
matthiasm@13
|
1253 }
|
matthiasm@13
|
1254 chromanorm[0] = sqrt(chromanorm[0]);
|
matthiasm@13
|
1255 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
matthiasm@13
|
1256 chromanorm[1] += pow(*it,2);
|
matthiasm@13
|
1257 }
|
matthiasm@13
|
1258 chromanorm[1] = sqrt(chromanorm[1]);
|
matthiasm@13
|
1259 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
matthiasm@13
|
1260 chromanorm[2] += pow(*it,2);
|
matthiasm@13
|
1261 }
|
matthiasm@13
|
1262 chromanorm[2] = sqrt(chromanorm[2]);
|
matthiasm@13
|
1263 break;
|
matthiasm@13
|
1264 }
|
matthiasm@13
|
1265 if (chromanorm[0] > 0) {
|
matthiasm@13
|
1266 for (int i = 0; i < f4.values.size(); i++) {
|
matthiasm@13
|
1267 f4.values[i] /= chromanorm[0];
|
matthiasm@13
|
1268 }
|
matthiasm@13
|
1269 }
|
matthiasm@13
|
1270 if (chromanorm[1] > 0) {
|
matthiasm@13
|
1271 for (int i = 0; i < f5.values.size(); i++) {
|
matthiasm@13
|
1272 f5.values[i] /= chromanorm[1];
|
matthiasm@13
|
1273 }
|
matthiasm@13
|
1274 }
|
matthiasm@13
|
1275 if (chromanorm[2] > 0) {
|
matthiasm@13
|
1276 for (int i = 0; i < f6.values.size(); i++) {
|
matthiasm@13
|
1277 f6.values[i] /= chromanorm[2];
|
matthiasm@13
|
1278 }
|
matthiasm@13
|
1279 }
|
matthiasm@13
|
1280
|
matthiasm@13
|
1281 }
|
matthiasm@13
|
1282
|
matthiasm@1
|
1283 // local chord estimation
|
matthiasm@1
|
1284 vector<float> currentChordSalience;
|
matthiasm@1
|
1285 float tempchordvalue = 0;
|
matthiasm@1
|
1286 float sumchordvalue = 0;
|
matthiasm@9
|
1287
|
matthiasm@1
|
1288 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1289 tempchordvalue = 0;
|
matthiasm@1
|
1290 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@9
|
1291 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1292 }
|
matthiasm@1
|
1293 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@9
|
1294 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1295 }
|
matthiasm@1
|
1296 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
1297 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
1298 }
|
matthiasm@1
|
1299 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1300 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@1
|
1301 }
|
matthiasm@1
|
1302 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1303
|
matthiasm@1
|
1304 fsOut[3].push_back(f3);
|
matthiasm@1
|
1305 fsOut[4].push_back(f4);
|
matthiasm@1
|
1306 fsOut[5].push_back(f5);
|
matthiasm@1
|
1307 fsOut[6].push_back(f6);
|
matthiasm@1
|
1308 count++;
|
matthiasm@1
|
1309 }
|
matthiasm@13
|
1310 cerr << "done." << endl;
|
matthiasm@13
|
1311
|
matthiasm@10
|
1312
|
matthiasm@3
|
1313 /* Simple chord estimation
|
matthiasm@3
|
1314 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@3
|
1315 take the maximum. Very simple, don't do this at home...
|
matthiasm@3
|
1316 */
|
matthiasm@13
|
1317 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
matthiasm@3
|
1318 count = 0;
|
matthiasm@3
|
1319 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@3
|
1320 vector<int> chordSequence;
|
matthiasm@3
|
1321 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
matthiasm@3
|
1322 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@3
|
1323 scoreChordogram.push_back(temp);
|
matthiasm@3
|
1324 }
|
matthiasm@4
|
1325 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
matthiasm@3
|
1326 int startIndex = count + 1;
|
matthiasm@3
|
1327 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
1328
|
matthiasm@10
|
1329 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
1330
|
matthiasm@10
|
1331 vector<int> chordCandidates;
|
matthiasm@10
|
1332 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
matthiasm@10
|
1333 // float currsum = 0;
|
matthiasm@10
|
1334 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@10
|
1335 // currsum += chordogram[iFrame][iChord];
|
matthiasm@10
|
1336 // }
|
matthiasm@10
|
1337 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
matthiasm@10
|
1338 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@10
|
1339 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@10
|
1340 chordCandidates.push_back(iChord);
|
matthiasm@10
|
1341 break;
|
matthiasm@10
|
1342 }
|
matthiasm@10
|
1343 }
|
matthiasm@10
|
1344 }
|
matthiasm@10
|
1345 chordCandidates.push_back(nChord-1);
|
matthiasm@10
|
1346 // cerr << chordCandidates.size() << endl;
|
matthiasm@10
|
1347
|
matthiasm@10
|
1348 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@4
|
1349 float maxindex = 0; //... and the index thereof
|
matthiasm@10
|
1350 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
matthiasm@10
|
1351 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@10
|
1352
|
matthiasm@4
|
1353 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@3
|
1354 // now find the max values on both sides of iWF
|
matthiasm@3
|
1355 // left side:
|
matthiasm@3
|
1356 float maxL = 0;
|
matthiasm@3
|
1357 unsigned maxindL = nChord-1;
|
matthiasm@10
|
1358 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@10
|
1359 unsigned iChord = chordCandidates[kChord];
|
matthiasm@3
|
1360 float currsum = 0;
|
matthiasm@3
|
1361 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@3
|
1362 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1363 }
|
matthiasm@3
|
1364 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1365 if (currsum > maxL) {
|
matthiasm@3
|
1366 maxL = currsum;
|
matthiasm@3
|
1367 maxindL = iChord;
|
matthiasm@3
|
1368 }
|
matthiasm@3
|
1369 }
|
matthiasm@3
|
1370 // right side:
|
matthiasm@3
|
1371 float maxR = 0;
|
matthiasm@3
|
1372 unsigned maxindR = nChord-1;
|
matthiasm@10
|
1373 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@10
|
1374 unsigned iChord = chordCandidates[kChord];
|
matthiasm@3
|
1375 float currsum = 0;
|
matthiasm@3
|
1376 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1377 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1378 }
|
matthiasm@3
|
1379 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1380 if (currsum > maxR) {
|
matthiasm@3
|
1381 maxR = currsum;
|
matthiasm@3
|
1382 maxindR = iChord;
|
matthiasm@3
|
1383 }
|
matthiasm@3
|
1384 }
|
matthiasm@3
|
1385 if (maxL+maxR > maxval) {
|
matthiasm@3
|
1386 maxval = maxL+maxR;
|
matthiasm@3
|
1387 maxindex = iWF;
|
matthiasm@3
|
1388 bestchordL = maxindL;
|
matthiasm@3
|
1389 bestchordR = maxindR;
|
matthiasm@3
|
1390 }
|
matthiasm@3
|
1391
|
matthiasm@3
|
1392 }
|
matthiasm@3
|
1393 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@3
|
1394 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@3
|
1395 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@3
|
1396 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@3
|
1397 }
|
matthiasm@3
|
1398 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1399 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@3
|
1400 }
|
matthiasm@3
|
1401 count++;
|
matthiasm@3
|
1402 }
|
matthiasm@13
|
1403 // cerr << "******* agent finished *******" << endl;
|
matthiasm@3
|
1404 count = 0;
|
matthiasm@3
|
1405 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1406 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1407 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1408 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1409 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@3
|
1410 maxval = scoreChordogram[count][iChord];
|
matthiasm@3
|
1411 maxindex = iChord;
|
matthiasm@4
|
1412 // cerr << iChord << endl;
|
matthiasm@3
|
1413 }
|
matthiasm@3
|
1414 }
|
matthiasm@3
|
1415 chordSequence.push_back(maxindex);
|
matthiasm@4
|
1416 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
matthiasm@3
|
1417 count++;
|
matthiasm@3
|
1418 }
|
matthiasm@13
|
1419 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
1420
|
matthiasm@3
|
1421
|
matthiasm@3
|
1422 // mode filter on chordSequence
|
matthiasm@3
|
1423 count = 0;
|
matthiasm@12
|
1424 string oldChord = "";
|
matthiasm@3
|
1425 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1426 Feature f6 = *it;
|
matthiasm@3
|
1427 Feature f7; // chord estimate
|
matthiasm@3
|
1428 f7.hasTimestamp = true;
|
matthiasm@3
|
1429 f7.timestamp = f6.timestamp;
|
matthiasm@3
|
1430 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@3
|
1431 int maxChordCount = 0;
|
matthiasm@3
|
1432 int maxChordIndex = nChord-1;
|
matthiasm@12
|
1433 string maxChord;
|
matthiasm@4
|
1434 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@4
|
1435 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@4
|
1436 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@4
|
1437 chordCount[chordSequence[i]]++;
|
matthiasm@4
|
1438 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@7
|
1439 // cerr << "start index " << startIndex << endl;
|
matthiasm@4
|
1440 maxChordCount++;
|
matthiasm@4
|
1441 maxChordIndex = chordSequence[i];
|
matthiasm@12
|
1442 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@4
|
1443 }
|
matthiasm@4
|
1444 }
|
matthiasm@4
|
1445 // chordSequence[count] = maxChordIndex;
|
matthiasm@7
|
1446 // cerr << maxChordIndex << endl;
|
matthiasm@12
|
1447 if (oldChord != maxChord) {
|
matthiasm@12
|
1448 oldChord = maxChord;
|
matthiasm@3
|
1449
|
matthiasm@9
|
1450 // char buffer1 [50];
|
matthiasm@9
|
1451 // if (maxChordIndex < nChord - 1) {
|
matthiasm@9
|
1452 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@9
|
1453 // } else {
|
matthiasm@9
|
1454 // sprintf(buffer1, "N");
|
matthiasm@9
|
1455 // }
|
matthiasm@9
|
1456 // f7.label = buffer1;
|
matthiasm@9
|
1457 f7.label = m_chordnames[maxChordIndex];
|
matthiasm@3
|
1458 fsOut[7].push_back(f7);
|
matthiasm@3
|
1459 }
|
matthiasm@3
|
1460 count++;
|
matthiasm@3
|
1461 }
|
matthiasm@13
|
1462 cerr << "done." << endl;
|
matthiasm@0
|
1463 // // musicity
|
matthiasm@0
|
1464 // count = 0;
|
matthiasm@0
|
1465 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1466 // vector<float> musicityValue;
|
matthiasm@0
|
1467 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1468 // Feature f4 = *it;
|
matthiasm@0
|
1469 //
|
matthiasm@0
|
1470 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1471 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1472 // float chromasum = 0;
|
matthiasm@0
|
1473 // float diffsum = 0;
|
matthiasm@0
|
1474 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1475 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1476 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1477 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1478 // }
|
matthiasm@0
|
1479 // }
|
matthiasm@0
|
1480 // diffsum /= chromasum;
|
matthiasm@0
|
1481 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1482 // count++;
|
matthiasm@0
|
1483 // }
|
matthiasm@0
|
1484 //
|
matthiasm@0
|
1485 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1486 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1487 // musicityThreshold = 0.74;
|
matthiasm@0
|
1488 // }
|
matthiasm@0
|
1489 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1490 // musicityThreshold = 0.77;
|
matthiasm@0
|
1491 // }
|
matthiasm@0
|
1492 //
|
matthiasm@0
|
1493 // count = 0;
|
matthiasm@0
|
1494 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1495 // Feature f4 = *it;
|
matthiasm@0
|
1496 // Feature f8; // musicity
|
matthiasm@0
|
1497 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1498 //
|
matthiasm@0
|
1499 // f8.hasTimestamp = true;
|
matthiasm@0
|
1500 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1501 // f9.hasTimestamp = true;
|
matthiasm@0
|
1502 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1503 //
|
matthiasm@0
|
1504 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1505 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1506 // int musicityCount = 0;
|
matthiasm@0
|
1507 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1508 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1509 // }
|
matthiasm@0
|
1510 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1511 //
|
matthiasm@0
|
1512 // if (isSpeech) {
|
matthiasm@0
|
1513 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1514 // f9.label = "Speech";
|
matthiasm@0
|
1515 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1516 // oldlabeltype = 2;
|
matthiasm@0
|
1517 // }
|
matthiasm@0
|
1518 // } else {
|
matthiasm@0
|
1519 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1520 // f9.label = "Music";
|
matthiasm@0
|
1521 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1522 // oldlabeltype = 1;
|
matthiasm@0
|
1523 // }
|
matthiasm@0
|
1524 // }
|
matthiasm@0
|
1525 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1526 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1527 // count++;
|
matthiasm@0
|
1528 // }
|
matthiasm@0
|
1529 return fsOut;
|
matthiasm@0
|
1530
|
matthiasm@0
|
1531 }
|
matthiasm@0
|
1532
|