matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@0
|
4 #include <list>
|
matthiasm@0
|
5 #include <iostream>
|
matthiasm@3
|
6 #include <fstream>
|
matthiasm@0
|
7 #include <sstream>
|
matthiasm@0
|
8 #include <cassert>
|
matthiasm@7
|
9 #include <cstdlib>
|
matthiasm@0
|
10 #include <cstdio>
|
matthiasm@7
|
11 #include <boost/tokenizer.hpp>
|
matthiasm@7
|
12 #include <boost/iostreams/device/file.hpp>
|
matthiasm@7
|
13 #include <boost/iostreams/stream.hpp>
|
matthiasm@7
|
14 #include <boost/lexical_cast.hpp>
|
matthiasm@1
|
15 #include "nnls.h"
|
matthiasm@0
|
16 // #include "cblas.h"
|
matthiasm@0
|
17 #include "chorddict.cpp"
|
matthiasm@0
|
18 using namespace std;
|
matthiasm@7
|
19 using namespace boost;
|
matthiasm@0
|
20
|
matthiasm@0
|
21 const float sinvalue = 0.866025404;
|
matthiasm@0
|
22 const float cosvalue = -0.5;
|
matthiasm@0
|
23 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
24 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
25 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
26 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
27 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@7
|
28
|
matthiasm@7
|
29 const char* bassnames[12][12] ={
|
matthiasm@7
|
30 {"A","","B","C","C#","D","","E","","F#","G","G#"},
|
matthiasm@7
|
31 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
|
matthiasm@7
|
32 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
|
matthiasm@7
|
33 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
|
matthiasm@7
|
34 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
|
matthiasm@7
|
35 {"D","","E","F","F#","G","","A","","B","C","C#"},
|
matthiasm@7
|
36 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
|
matthiasm@7
|
37 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
|
matthiasm@7
|
38 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
|
matthiasm@7
|
39 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
|
matthiasm@7
|
40 {"G","","A","Bb","B","C","","D","","E","F","F#"},
|
matthiasm@7
|
41 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
|
matthiasm@7
|
42 };
|
matthiasm@0
|
43 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
44 const int nNote = 256;
|
matthiasm@0
|
45
|
matthiasm@0
|
46 /** Special Convolution
|
matthiasm@0
|
47 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
48 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
49 as the first (last) valid convolution bin.
|
matthiasm@0
|
50 **/
|
matthiasm@0
|
51
|
matthiasm@0
|
52 const bool debug_on = false;
|
matthiasm@0
|
53
|
matthiasm@0
|
54 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
55 {
|
matthiasm@0
|
56 float s;
|
matthiasm@0
|
57 int m, n;
|
matthiasm@0
|
58 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
59 int lenKernel = kernel.size();
|
matthiasm@0
|
60
|
matthiasm@0
|
61 vector<float> Z(256,0);
|
matthiasm@0
|
62 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
63
|
matthiasm@0
|
64 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
65 s=0.0;
|
matthiasm@0
|
66 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
67 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
68 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
69 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
70 }
|
matthiasm@0
|
71 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
72 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
73 }
|
matthiasm@0
|
74
|
matthiasm@0
|
75 // fill upper and lower pads
|
matthiasm@0
|
76 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
77 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
78 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
79 return Z;
|
matthiasm@0
|
80 }
|
matthiasm@0
|
81
|
matthiasm@0
|
82 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
83 // {
|
matthiasm@0
|
84 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
85 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
86 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
87 // }
|
matthiasm@0
|
88 // return freq;
|
matthiasm@0
|
89 // }
|
matthiasm@0
|
90
|
matthiasm@0
|
91 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
92 {
|
matthiasm@0
|
93 float recipwidth = 1.0/width;
|
matthiasm@0
|
94 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
95 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
96 }
|
matthiasm@0
|
97 return 0.0;
|
matthiasm@0
|
98 }
|
matthiasm@0
|
99
|
matthiasm@0
|
100 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
101 {
|
matthiasm@0
|
102 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
103 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
104 // now scale to correct for note density
|
matthiasm@0
|
105 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
106 if (x > 0) {
|
matthiasm@0
|
107 out = out / (c * x);
|
matthiasm@0
|
108 } else {
|
matthiasm@0
|
109 out = 0;
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111 return out;
|
matthiasm@0
|
112 }
|
matthiasm@0
|
113
|
matthiasm@0
|
114 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
115
|
matthiasm@0
|
116 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
117 int minoctave = 0; // this must be 0
|
matthiasm@0
|
118 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
119 int oversampling = 80;
|
matthiasm@0
|
120
|
matthiasm@0
|
121 // linear frequency vector
|
matthiasm@0
|
122 vector<float> fft_f;
|
matthiasm@0
|
123 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
124 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
125 }
|
matthiasm@0
|
126 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
127
|
matthiasm@0
|
128 // linear oversampled frequency vector
|
matthiasm@0
|
129 vector<float> oversampled_f;
|
matthiasm@0
|
130 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
131 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
132 }
|
matthiasm@0
|
133
|
matthiasm@0
|
134 // pitch-spaced frequency vector
|
matthiasm@0
|
135 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
136 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
137 vector<float> cq_f;
|
matthiasm@0
|
138 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
139 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
140 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
141 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
142 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
143 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
144 }
|
matthiasm@0
|
145 }
|
matthiasm@0
|
146 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
147 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
148
|
matthiasm@0
|
149 int nFFT = fft_f.size();
|
matthiasm@0
|
150
|
matthiasm@0
|
151 vector<float> fft_activation;
|
matthiasm@0
|
152 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
153 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
154 fft_activation.push_back(cosp);
|
matthiasm@0
|
155 // cerr << cosp << endl;
|
matthiasm@0
|
156 }
|
matthiasm@0
|
157
|
matthiasm@0
|
158 float cq_activation;
|
matthiasm@0
|
159 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
160 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
161 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
162 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
163 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
164 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
165 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
166 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
167 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
168 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
169 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
170 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
171 }
|
matthiasm@0
|
172 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
173 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
174 // }
|
matthiasm@0
|
175 }
|
matthiasm@0
|
176 }
|
matthiasm@0
|
177 }
|
matthiasm@0
|
178 return true;
|
matthiasm@0
|
179 }
|
matthiasm@0
|
180
|
matthiasm@3
|
181 bool dictionaryMatrix(float* dm) {
|
matthiasm@1
|
182 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
183 int minoctave = 0; // this must be 0
|
matthiasm@1
|
184 int maxoctave = 7; // this must be 7
|
matthiasm@4
|
185 float s_param = 0.7;
|
matthiasm@1
|
186
|
matthiasm@1
|
187 // pitch-spaced frequency vector
|
matthiasm@1
|
188 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
189 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
190 vector<float> cq_f;
|
matthiasm@1
|
191 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
192 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
193 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
194 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
195 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
196 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
197 }
|
matthiasm@1
|
198 }
|
matthiasm@1
|
199 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
200 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
201
|
matthiasm@1
|
202 float curr_f;
|
matthiasm@1
|
203 float floatbin;
|
matthiasm@1
|
204 float curr_amp;
|
matthiasm@1
|
205 // now for every combination calculate the matrix element
|
matthiasm@1
|
206 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@3
|
207 // cerr << iOut << endl;
|
matthiasm@1
|
208 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
209 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@3
|
210 // if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@3
|
211 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@3
|
212 // cerr << floatbin << endl;
|
matthiasm@1
|
213 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@3
|
214 // cerr << "curramp" << curr_amp << endl;
|
matthiasm@1
|
215 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@3
|
216 if (abs(iNote+1.0-floatbin)<2) {
|
matthiasm@3
|
217 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
matthiasm@3
|
218 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
matthiasm@3
|
219 }
|
matthiasm@1
|
220 }
|
matthiasm@3
|
221 }
|
matthiasm@1
|
222 }
|
matthiasm@3
|
223
|
matthiasm@3
|
224
|
matthiasm@1
|
225 }
|
matthiasm@1
|
226
|
matthiasm@7
|
227 string get_env_var( std::string const & key ) {
|
matthiasm@7
|
228 char * val;
|
matthiasm@7
|
229 val = getenv( key.c_str() );
|
matthiasm@7
|
230 string retval;
|
matthiasm@7
|
231 if (val != NULL) {
|
matthiasm@7
|
232 retval = val;
|
matthiasm@7
|
233 }
|
matthiasm@7
|
234 return retval;
|
matthiasm@7
|
235 }
|
matthiasm@7
|
236
|
matthiasm@7
|
237
|
matthiasm@7
|
238 int chordDictionary(float *mchorddict, string *chordnames) {
|
matthiasm@7
|
239 // ifstream chordDictFile;
|
matthiasm@7
|
240 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
|
matthiasm@7
|
241 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
|
matthiasm@7
|
242 typedef tokenizer<char_separator<char> > Tok;
|
matthiasm@7
|
243 // char_separator<char> sep; // default constructed
|
matthiasm@7
|
244 char_separator<char> sep(",; ",":");
|
matthiasm@7
|
245 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
|
matthiasm@7
|
246 string line;
|
matthiasm@7
|
247 int iElement = 0;
|
matthiasm@7
|
248 int nChord = 0;
|
matthiasm@7
|
249
|
matthiasm@7
|
250 vector<string> loadedChordNames;
|
matthiasm@7
|
251 vector<float> loadedChordDict;
|
matthiasm@7
|
252 if (chordDictFile.is_open()) {
|
matthiasm@7
|
253 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
|
matthiasm@7
|
254 // first, get the chord definition
|
matthiasm@7
|
255 string chordType;
|
matthiasm@7
|
256 vector<float> tempPCVector;
|
matthiasm@7
|
257 // cerr << line << endl;
|
matthiasm@7
|
258 if (!line.empty() && line.substr(0,1) != "#") {
|
matthiasm@7
|
259 Tok tok(line, sep);
|
matthiasm@7
|
260 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
|
matthiasm@7
|
261 string tempString = *tok_iter;
|
matthiasm@7
|
262 // cerr << tempString << endl;
|
matthiasm@7
|
263 if (tok_iter == tok.begin()) { // either the chord name or a colon
|
matthiasm@7
|
264 if (tempString == ":") {
|
matthiasm@7
|
265 chordType = "";
|
matthiasm@7
|
266 } else {
|
matthiasm@7
|
267 chordType = tempString;
|
matthiasm@7
|
268 tok_iter++; // is this cheating ? :)
|
matthiasm@7
|
269 }
|
matthiasm@7
|
270 } else {
|
matthiasm@7
|
271 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
|
matthiasm@7
|
272 }
|
matthiasm@7
|
273 }
|
matthiasm@7
|
274
|
matthiasm@7
|
275 // now make all 12 chords of every type
|
matthiasm@7
|
276 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
|
matthiasm@7
|
277 // add bass slash notation
|
matthiasm@7
|
278 string slashNotation = "";
|
matthiasm@7
|
279 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
|
matthiasm@7
|
280 if (tempPCVector[(kSemitone) % 12] > 0.99) {
|
matthiasm@7
|
281 slashNotation = bassnames[iSemitone][kSemitone];
|
matthiasm@7
|
282 }
|
matthiasm@7
|
283 }
|
matthiasm@7
|
284 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
|
matthiasm@7
|
285 cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
|
matthiasm@7
|
286 loadedChordDict.push_back(0.5 * tempPCVector[(kSemitone - iSemitone + 12) % 12] + 0.5 * tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
|
matthiasm@7
|
287 }
|
matthiasm@7
|
288 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
|
matthiasm@7
|
289 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
|
matthiasm@7
|
290 }
|
matthiasm@7
|
291 ostringstream os;
|
matthiasm@7
|
292 if (slashNotation.empty()) {
|
matthiasm@7
|
293 os << notenames[12+iSemitone] << chordType;
|
matthiasm@7
|
294 } else {
|
matthiasm@7
|
295 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
|
matthiasm@7
|
296 }
|
matthiasm@7
|
297
|
matthiasm@7
|
298 loadedChordNames.push_back(os.str());
|
matthiasm@7
|
299 }
|
matthiasm@7
|
300 }
|
matthiasm@7
|
301 }
|
matthiasm@7
|
302 // N type
|
matthiasm@7
|
303 loadedChordNames.push_back("N");
|
matthiasm@7
|
304 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
|
matthiasm@7
|
305 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
|
matthiasm@7
|
306
|
matthiasm@7
|
307 // normalise
|
matthiasm@7
|
308 float sum = 0;
|
matthiasm@7
|
309 for (int i = 0; i < loadedChordDict.size(); i++) {
|
matthiasm@7
|
310 sum += pow(loadedChordDict[i],2);
|
matthiasm@7
|
311 if (i % 24 == 23) {
|
matthiasm@7
|
312 float invertedsum = 1.0/sqrt(sum);
|
matthiasm@7
|
313 for (int k = 0; k < 24; k++) {
|
matthiasm@7
|
314 loadedChordDict[i-k] *= invertedsum;
|
matthiasm@7
|
315 }
|
matthiasm@7
|
316 sum = 0;
|
matthiasm@7
|
317 }
|
matthiasm@7
|
318
|
matthiasm@7
|
319 }
|
matthiasm@7
|
320
|
matthiasm@7
|
321
|
matthiasm@7
|
322 nChord = 0;
|
matthiasm@7
|
323 for (int i = 0; i < loadedChordNames.size(); i++) {
|
matthiasm@7
|
324 chordnames[i] = loadedChordNames[i];
|
matthiasm@7
|
325 cerr << "in chordnames "<< chordnames[i] << endl;
|
matthiasm@7
|
326 nChord++;
|
matthiasm@7
|
327 }
|
matthiasm@7
|
328 chordDictFile.close();
|
matthiasm@7
|
329
|
matthiasm@7
|
330
|
matthiasm@7
|
331 mchorddict = new float[nChord*24];
|
matthiasm@7
|
332 for (int i = 0; i < nChord*24; i++) {
|
matthiasm@7
|
333 mchorddict[i] = loadedChordDict[i];
|
matthiasm@7
|
334 }
|
matthiasm@7
|
335
|
matthiasm@7
|
336 // mchordnames = new string[nChord];
|
matthiasm@7
|
337 // for (int i = 0; i < nChord; i++) {
|
matthiasm@7
|
338 // mchordnames[i] = loadedChordNames[i];
|
matthiasm@7
|
339 // }
|
matthiasm@7
|
340
|
matthiasm@7
|
341 // return loadedChordNames;
|
matthiasm@7
|
342 } else {// use default from chorddict.cpp
|
matthiasm@7
|
343 mchorddict = new float[nChorddict];
|
matthiasm@7
|
344 for (int i = 0; i < nChorddict; i++) {
|
matthiasm@7
|
345 mchorddict[i] = chorddict[i];
|
matthiasm@7
|
346 }
|
matthiasm@7
|
347
|
matthiasm@7
|
348 nChord = nChorddict/24;
|
matthiasm@7
|
349 // mchordnames = new string[nChorddict/24];
|
matthiasm@7
|
350 char buffer1 [50];
|
matthiasm@7
|
351 for (int i = 0; i < nChorddict/24; i++) {
|
matthiasm@7
|
352 if (i < nChorddict/24 - 1) {
|
matthiasm@7
|
353 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
|
matthiasm@7
|
354 } else {
|
matthiasm@7
|
355 sprintf(buffer1, "N");
|
matthiasm@7
|
356 }
|
matthiasm@7
|
357 ostringstream os;
|
matthiasm@7
|
358 os << buffer1;
|
matthiasm@7
|
359 // loadedChordNames.push_back(os.str());
|
matthiasm@7
|
360 chordnames[i] = os.str();
|
matthiasm@7
|
361 }
|
matthiasm@7
|
362
|
matthiasm@7
|
363 }
|
matthiasm@7
|
364 cerr << "before leaving" << chordnames[1] << endl;
|
matthiasm@7
|
365 return nChord;
|
matthiasm@7
|
366 }
|
matthiasm@0
|
367
|
matthiasm@0
|
368 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
369 Plugin(inputSampleRate),
|
matthiasm@0
|
370 m_fl(0),
|
matthiasm@0
|
371 m_blockSize(0),
|
matthiasm@0
|
372 m_stepSize(0),
|
matthiasm@0
|
373 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
374 m_meanTuning0(0),
|
matthiasm@0
|
375 m_meanTuning1(0),
|
matthiasm@0
|
376 m_meanTuning2(0),
|
matthiasm@0
|
377 m_localTuning0(0),
|
matthiasm@0
|
378 m_localTuning1(0),
|
matthiasm@0
|
379 m_localTuning2(0),
|
matthiasm@4
|
380 m_paling(1.0),
|
matthiasm@3
|
381 m_preset(0.0),
|
matthiasm@0
|
382 m_localTuning(0),
|
matthiasm@0
|
383 m_kernelValue(0),
|
matthiasm@0
|
384 m_kernelFftIndex(0),
|
matthiasm@0
|
385 m_kernelNoteIndex(0),
|
matthiasm@1
|
386 m_dict(0),
|
matthiasm@0
|
387 m_tuneLocal(false),
|
matthiasm@7
|
388 m_dictID(0),
|
matthiasm@7
|
389 m_chorddict(0),
|
matthiasm@7
|
390 m_chordnames(0)
|
matthiasm@0
|
391 {
|
matthiasm@0
|
392 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@7
|
393
|
matthiasm@7
|
394 // make the *note* dictionary matrix
|
matthiasm@3
|
395 m_dict = new float[nNote * 84];
|
matthiasm@3
|
396 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@1
|
397 dictionaryMatrix(m_dict);
|
matthiasm@7
|
398
|
matthiasm@7
|
399 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@7
|
400 string *chordnames;
|
matthiasm@7
|
401 chordnames = new string[1000];
|
matthiasm@7
|
402 int nchord = chordDictionary(m_chorddict, chordnames);
|
matthiasm@7
|
403 cerr << nchord << endl;
|
matthiasm@7
|
404 for (int i = 0; i < nchord; i++) {
|
matthiasm@7
|
405 m_chordnames.push_back(chordnames[i]);
|
matthiasm@7
|
406 }
|
matthiasm@7
|
407 delete [] chordnames;
|
matthiasm@0
|
408 }
|
matthiasm@0
|
409
|
matthiasm@0
|
410
|
matthiasm@0
|
411 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
412 {
|
matthiasm@0
|
413 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
414 delete [] m_dict;
|
matthiasm@7
|
415 delete [] m_chorddict;
|
matthiasm@7
|
416 // delete m_chordnames;
|
matthiasm@0
|
417 }
|
matthiasm@0
|
418
|
matthiasm@0
|
419 string
|
matthiasm@0
|
420 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
421 {
|
matthiasm@0
|
422 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
423 return "nnls_chroma";
|
matthiasm@0
|
424 }
|
matthiasm@0
|
425
|
matthiasm@0
|
426 string
|
matthiasm@0
|
427 NNLSChroma::getName() const
|
matthiasm@0
|
428 {
|
matthiasm@0
|
429 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
430 return "NNLS Chroma";
|
matthiasm@0
|
431 }
|
matthiasm@0
|
432
|
matthiasm@0
|
433 string
|
matthiasm@0
|
434 NNLSChroma::getDescription() const
|
matthiasm@0
|
435 {
|
matthiasm@0
|
436 // Return something helpful here!
|
matthiasm@0
|
437 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@4
|
438 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
|
matthiasm@0
|
439 }
|
matthiasm@0
|
440
|
matthiasm@0
|
441 string
|
matthiasm@0
|
442 NNLSChroma::getMaker() const
|
matthiasm@0
|
443 {
|
matthiasm@0
|
444 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
445 // Your name here
|
matthiasm@0
|
446 return "Matthias Mauch";
|
matthiasm@0
|
447 }
|
matthiasm@0
|
448
|
matthiasm@0
|
449 int
|
matthiasm@0
|
450 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
451 {
|
matthiasm@0
|
452 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
453 // Increment this each time you release a version that behaves
|
matthiasm@0
|
454 // differently from the previous one
|
matthiasm@0
|
455 return 1;
|
matthiasm@0
|
456 }
|
matthiasm@0
|
457
|
matthiasm@0
|
458 string
|
matthiasm@0
|
459 NNLSChroma::getCopyright() const
|
matthiasm@0
|
460 {
|
matthiasm@0
|
461 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
462 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
463 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
464 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
465 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
466 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
467 }
|
matthiasm@0
|
468
|
matthiasm@0
|
469 NNLSChroma::InputDomain
|
matthiasm@0
|
470 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
471 {
|
matthiasm@0
|
472 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
473 return FrequencyDomain;
|
matthiasm@0
|
474 }
|
matthiasm@0
|
475
|
matthiasm@0
|
476 size_t
|
matthiasm@0
|
477 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
478 {
|
matthiasm@0
|
479 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
480 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
481 }
|
matthiasm@0
|
482
|
matthiasm@0
|
483 size_t
|
matthiasm@0
|
484 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
485 {
|
matthiasm@0
|
486 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
487 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
488 // means the same as the block size for TimeDomain
|
matthiasm@0
|
489 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
490 }
|
matthiasm@0
|
491
|
matthiasm@0
|
492 size_t
|
matthiasm@0
|
493 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
494 {
|
matthiasm@0
|
495 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
496 return 1;
|
matthiasm@0
|
497 }
|
matthiasm@0
|
498
|
matthiasm@0
|
499 size_t
|
matthiasm@0
|
500 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
501 {
|
matthiasm@0
|
502 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
503 return 1;
|
matthiasm@0
|
504 }
|
matthiasm@0
|
505
|
matthiasm@0
|
506 NNLSChroma::ParameterList
|
matthiasm@0
|
507 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
508 {
|
matthiasm@0
|
509 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
510 ParameterList list;
|
matthiasm@0
|
511
|
matthiasm@3
|
512 ParameterDescriptor d3;
|
matthiasm@3
|
513 d3.identifier = "preset";
|
matthiasm@3
|
514 d3.name = "preset";
|
matthiasm@3
|
515 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
516 d3.unit = "";
|
matthiasm@3
|
517 d3.isQuantized = true;
|
matthiasm@3
|
518 d3.quantizeStep = 1;
|
matthiasm@3
|
519 d3.minValue = 0.0;
|
matthiasm@4
|
520 d3.maxValue = 3.0;
|
matthiasm@3
|
521 d3.defaultValue = 0.0;
|
matthiasm@3
|
522 d3.valueNames.push_back("polyphonic pop");
|
matthiasm@3
|
523 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
524 d3.valueNames.push_back("solo keyboard");
|
matthiasm@3
|
525 d3.valueNames.push_back("manual");
|
matthiasm@3
|
526 list.push_back(d3);
|
matthiasm@4
|
527
|
matthiasm@4
|
528 // ParameterDescriptor d0;
|
matthiasm@4
|
529 // d0.identifier = "notedict";
|
matthiasm@4
|
530 // d0.name = "note dictionary";
|
matthiasm@4
|
531 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
532 // d0.unit = "";
|
matthiasm@4
|
533 // d0.minValue = 0;
|
matthiasm@4
|
534 // d0.maxValue = 1;
|
matthiasm@4
|
535 // d0.defaultValue = 0;
|
matthiasm@4
|
536 // d0.isQuantized = true;
|
matthiasm@4
|
537 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
538 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
539 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
540 // list.push_back(d0);
|
matthiasm@4
|
541
|
matthiasm@4
|
542 ParameterDescriptor d1;
|
matthiasm@4
|
543 d1.identifier = "tuningmode";
|
matthiasm@4
|
544 d1.name = "tuning mode";
|
matthiasm@4
|
545 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
546 d1.unit = "";
|
matthiasm@4
|
547 d1.minValue = 0;
|
matthiasm@4
|
548 d1.maxValue = 1;
|
matthiasm@4
|
549 d1.defaultValue = 0;
|
matthiasm@4
|
550 d1.isQuantized = true;
|
matthiasm@4
|
551 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
552 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
553 d1.quantizeStep = 1.0;
|
matthiasm@4
|
554 list.push_back(d1);
|
matthiasm@4
|
555
|
matthiasm@4
|
556 // ParameterDescriptor d2;
|
matthiasm@4
|
557 // d2.identifier = "paling";
|
matthiasm@4
|
558 // d2.name = "spectral paling";
|
matthiasm@4
|
559 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@4
|
560 // d2.unit = "";
|
matthiasm@4
|
561 // d2.isQuantized = true;
|
matthiasm@4
|
562 // // d2.quantizeStep = 0.1;
|
matthiasm@4
|
563 // d2.minValue = 0.0;
|
matthiasm@4
|
564 // d2.maxValue = 1.0;
|
matthiasm@4
|
565 // d2.defaultValue = 1.0;
|
matthiasm@4
|
566 // d2.isQuantized = false;
|
matthiasm@4
|
567 // list.push_back(d2);
|
matthiasm@4
|
568
|
matthiasm@0
|
569 return list;
|
matthiasm@0
|
570 }
|
matthiasm@0
|
571
|
matthiasm@0
|
572 float
|
matthiasm@0
|
573 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
574 {
|
matthiasm@3
|
575 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
576 if (identifier == "notedict") {
|
matthiasm@0
|
577 return m_dictID;
|
matthiasm@0
|
578 }
|
matthiasm@0
|
579
|
matthiasm@0
|
580 if (identifier == "paling") {
|
matthiasm@0
|
581 return m_paling;
|
matthiasm@0
|
582 }
|
matthiasm@0
|
583
|
matthiasm@0
|
584 if (identifier == "tuningmode") {
|
matthiasm@0
|
585 if (m_tuneLocal) {
|
matthiasm@0
|
586 return 1.0;
|
matthiasm@0
|
587 } else {
|
matthiasm@0
|
588 return 0.0;
|
matthiasm@0
|
589 }
|
matthiasm@0
|
590 }
|
matthiasm@3
|
591 if (identifier == "preset") {
|
matthiasm@3
|
592 return m_preset;
|
matthiasm@3
|
593 }
|
matthiasm@0
|
594 return 0;
|
matthiasm@0
|
595
|
matthiasm@0
|
596 }
|
matthiasm@0
|
597
|
matthiasm@0
|
598 void
|
matthiasm@0
|
599 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
600 {
|
matthiasm@3
|
601 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
602 if (identifier == "notedict") {
|
matthiasm@0
|
603 m_dictID = (int) value;
|
matthiasm@0
|
604 }
|
matthiasm@0
|
605
|
matthiasm@0
|
606 if (identifier == "paling") {
|
matthiasm@0
|
607 m_paling = value;
|
matthiasm@0
|
608 }
|
matthiasm@0
|
609
|
matthiasm@0
|
610 if (identifier == "tuningmode") {
|
matthiasm@0
|
611 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
612 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
613 }
|
matthiasm@3
|
614 if (identifier == "preset") {
|
matthiasm@3
|
615 m_preset = value;
|
matthiasm@3
|
616 if (m_preset == 0.0) {
|
matthiasm@3
|
617 m_tuneLocal = false;
|
matthiasm@3
|
618 m_paling = 1.0;
|
matthiasm@3
|
619 m_dictID = 0.0;
|
matthiasm@3
|
620 }
|
matthiasm@3
|
621 if (m_preset == 1.0) {
|
matthiasm@3
|
622 m_tuneLocal = false;
|
matthiasm@3
|
623 m_paling = 1.0;
|
matthiasm@3
|
624 m_dictID = 1.0;
|
matthiasm@3
|
625 }
|
matthiasm@3
|
626 if (m_preset == 2.0) {
|
matthiasm@3
|
627 m_tuneLocal = false;
|
matthiasm@3
|
628 m_paling = 0.7;
|
matthiasm@3
|
629 m_dictID = 0.0;
|
matthiasm@3
|
630 }
|
matthiasm@3
|
631 }
|
matthiasm@0
|
632 }
|
matthiasm@0
|
633
|
matthiasm@0
|
634 NNLSChroma::ProgramList
|
matthiasm@0
|
635 NNLSChroma::getPrograms() const
|
matthiasm@0
|
636 {
|
matthiasm@0
|
637 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
638 ProgramList list;
|
matthiasm@0
|
639
|
matthiasm@0
|
640 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
641 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
642
|
matthiasm@0
|
643 return list;
|
matthiasm@0
|
644 }
|
matthiasm@0
|
645
|
matthiasm@0
|
646 string
|
matthiasm@0
|
647 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
648 {
|
matthiasm@0
|
649 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
650 return ""; // no programs
|
matthiasm@0
|
651 }
|
matthiasm@0
|
652
|
matthiasm@0
|
653 void
|
matthiasm@0
|
654 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
655 {
|
matthiasm@0
|
656 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
657 }
|
matthiasm@0
|
658
|
matthiasm@0
|
659
|
matthiasm@0
|
660 NNLSChroma::OutputList
|
matthiasm@0
|
661 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
662 {
|
matthiasm@0
|
663 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
664 OutputList list;
|
matthiasm@0
|
665
|
matthiasm@0
|
666 // Make chroma names for the binNames property
|
matthiasm@0
|
667 vector<string> chromanames;
|
matthiasm@0
|
668 vector<string> bothchromanames;
|
matthiasm@0
|
669 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
670 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
671 if (iNote < 12) {
|
matthiasm@0
|
672 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
673 }
|
matthiasm@0
|
674 }
|
matthiasm@0
|
675
|
matthiasm@1
|
676 // int nNote = 84;
|
matthiasm@0
|
677
|
matthiasm@0
|
678 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
679 // Every plugin must have at least one output.
|
matthiasm@0
|
680
|
matthiasm@0
|
681 OutputDescriptor d0;
|
matthiasm@0
|
682 d0.identifier = "tuning";
|
matthiasm@0
|
683 d0.name = "Tuning";
|
matthiasm@0
|
684 d0.description = "The concert pitch.";
|
matthiasm@0
|
685 d0.unit = "Hz";
|
matthiasm@0
|
686 d0.hasFixedBinCount = true;
|
matthiasm@0
|
687 d0.binCount = 0;
|
matthiasm@0
|
688 d0.hasKnownExtents = true;
|
matthiasm@0
|
689 d0.minValue = 427.47;
|
matthiasm@0
|
690 d0.maxValue = 452.89;
|
matthiasm@0
|
691 d0.isQuantized = false;
|
matthiasm@0
|
692 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
693 d0.hasDuration = false;
|
matthiasm@0
|
694 list.push_back(d0);
|
matthiasm@0
|
695
|
matthiasm@0
|
696 OutputDescriptor d1;
|
matthiasm@0
|
697 d1.identifier = "logfreqspec";
|
matthiasm@0
|
698 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
699 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
700 d1.unit = "";
|
matthiasm@0
|
701 d1.hasFixedBinCount = true;
|
matthiasm@0
|
702 d1.binCount = nNote;
|
matthiasm@0
|
703 d1.hasKnownExtents = false;
|
matthiasm@0
|
704 d1.isQuantized = false;
|
matthiasm@0
|
705 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
706 d1.hasDuration = false;
|
matthiasm@0
|
707 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
708 list.push_back(d1);
|
matthiasm@0
|
709
|
matthiasm@0
|
710 OutputDescriptor d2;
|
matthiasm@0
|
711 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
712 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
713 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
714 d2.unit = "";
|
matthiasm@0
|
715 d2.hasFixedBinCount = true;
|
matthiasm@0
|
716 d2.binCount = 256;
|
matthiasm@0
|
717 d2.hasKnownExtents = false;
|
matthiasm@0
|
718 d2.isQuantized = false;
|
matthiasm@0
|
719 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
720 d2.hasDuration = false;
|
matthiasm@0
|
721 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
722 list.push_back(d2);
|
matthiasm@0
|
723
|
matthiasm@0
|
724 OutputDescriptor d3;
|
matthiasm@0
|
725 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
726 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
727 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
728 d3.unit = "";
|
matthiasm@0
|
729 d3.hasFixedBinCount = true;
|
matthiasm@0
|
730 d3.binCount = 84;
|
matthiasm@0
|
731 d3.hasKnownExtents = false;
|
matthiasm@0
|
732 d3.isQuantized = false;
|
matthiasm@0
|
733 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
734 d3.hasDuration = false;
|
matthiasm@0
|
735 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
736 list.push_back(d3);
|
matthiasm@0
|
737
|
matthiasm@0
|
738 OutputDescriptor d4;
|
matthiasm@0
|
739 d4.identifier = "chroma";
|
matthiasm@0
|
740 d4.name = "Chromagram";
|
matthiasm@0
|
741 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
742 d4.unit = "";
|
matthiasm@0
|
743 d4.hasFixedBinCount = true;
|
matthiasm@0
|
744 d4.binCount = 12;
|
matthiasm@0
|
745 d4.binNames = chromanames;
|
matthiasm@0
|
746 d4.hasKnownExtents = false;
|
matthiasm@0
|
747 d4.isQuantized = false;
|
matthiasm@0
|
748 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
749 d4.hasDuration = false;
|
matthiasm@0
|
750 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
751 list.push_back(d4);
|
matthiasm@0
|
752
|
matthiasm@0
|
753 OutputDescriptor d5;
|
matthiasm@0
|
754 d5.identifier = "basschroma";
|
matthiasm@0
|
755 d5.name = "Bass Chromagram";
|
matthiasm@0
|
756 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
757 d5.unit = "";
|
matthiasm@0
|
758 d5.hasFixedBinCount = true;
|
matthiasm@0
|
759 d5.binCount = 12;
|
matthiasm@0
|
760 d5.binNames = chromanames;
|
matthiasm@0
|
761 d5.hasKnownExtents = false;
|
matthiasm@0
|
762 d5.isQuantized = false;
|
matthiasm@0
|
763 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
764 d5.hasDuration = false;
|
matthiasm@0
|
765 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
766 list.push_back(d5);
|
matthiasm@0
|
767
|
matthiasm@0
|
768 OutputDescriptor d6;
|
matthiasm@0
|
769 d6.identifier = "bothchroma";
|
matthiasm@0
|
770 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
771 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
772 d6.unit = "";
|
matthiasm@0
|
773 d6.hasFixedBinCount = true;
|
matthiasm@0
|
774 d6.binCount = 24;
|
matthiasm@0
|
775 d6.binNames = bothchromanames;
|
matthiasm@0
|
776 d6.hasKnownExtents = false;
|
matthiasm@0
|
777 d6.isQuantized = false;
|
matthiasm@0
|
778 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
779 d6.hasDuration = false;
|
matthiasm@0
|
780 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
781 list.push_back(d6);
|
matthiasm@0
|
782
|
matthiasm@0
|
783 OutputDescriptor d7;
|
matthiasm@0
|
784 d7.identifier = "simplechord";
|
matthiasm@0
|
785 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
786 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
787 d7.unit = "";
|
matthiasm@0
|
788 d7.hasFixedBinCount = true;
|
matthiasm@0
|
789 d7.binCount = 0;
|
matthiasm@0
|
790 d7.hasKnownExtents = false;
|
matthiasm@0
|
791 d7.isQuantized = false;
|
matthiasm@0
|
792 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
793 d7.hasDuration = false;
|
matthiasm@0
|
794 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
795 list.push_back(d7);
|
matthiasm@0
|
796
|
matthiasm@1
|
797 // OutputDescriptor d8;
|
matthiasm@1
|
798 // d8.identifier = "inconsistency";
|
matthiasm@1
|
799 // d8.name = "Harmonic inconsistency value";
|
matthiasm@1
|
800 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@1
|
801 // d8.unit = "";
|
matthiasm@1
|
802 // d8.hasFixedBinCount = true;
|
matthiasm@1
|
803 // d8.binCount = 1;
|
matthiasm@1
|
804 // d8.hasKnownExtents = false;
|
matthiasm@1
|
805 // d8.isQuantized = false;
|
matthiasm@1
|
806 // d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
807 // d8.hasDuration = false;
|
matthiasm@1
|
808 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
809 // list.push_back(d8);
|
matthiasm@1
|
810 //
|
matthiasm@1
|
811 // OutputDescriptor d9;
|
matthiasm@1
|
812 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
813 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
814 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
815 // d9.unit = "";
|
matthiasm@1
|
816 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
817 // d9.binCount = 0;
|
matthiasm@1
|
818 // d9.hasKnownExtents = true;
|
matthiasm@1
|
819 // d9.minValue = 0.1;
|
matthiasm@1
|
820 // d9.maxValue = 0.9;
|
matthiasm@1
|
821 // d9.isQuantized = false;
|
matthiasm@1
|
822 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
823 // d9.hasDuration = false;
|
matthiasm@1
|
824 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
825 // list.push_back(d9);
|
matthiasm@1
|
826 //
|
matthiasm@1
|
827 OutputDescriptor d10;
|
matthiasm@1
|
828 d10.identifier = "localtuning";
|
matthiasm@1
|
829 d10.name = "Local tuning";
|
matthiasm@4
|
830 d10.description = "Tuning based on the history up to this timestamp.";
|
matthiasm@1
|
831 d10.unit = "Hz";
|
matthiasm@1
|
832 d10.hasFixedBinCount = true;
|
matthiasm@1
|
833 d10.binCount = 1;
|
matthiasm@1
|
834 d10.hasKnownExtents = true;
|
matthiasm@1
|
835 d10.minValue = 427.47;
|
matthiasm@1
|
836 d10.maxValue = 452.89;
|
matthiasm@1
|
837 d10.isQuantized = false;
|
matthiasm@3
|
838 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
839 d10.hasDuration = false;
|
matthiasm@3
|
840 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
841 list.push_back(d10);
|
matthiasm@1
|
842
|
matthiasm@0
|
843 return list;
|
matthiasm@0
|
844 }
|
matthiasm@0
|
845
|
matthiasm@0
|
846
|
matthiasm@0
|
847 bool
|
matthiasm@0
|
848 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
849 {
|
matthiasm@1
|
850 if (debug_on) {
|
matthiasm@1
|
851 cerr << "--> initialise";
|
matthiasm@1
|
852 }
|
matthiasm@1
|
853
|
matthiasm@0
|
854 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
855 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
856 m_blockSize = blockSize;
|
matthiasm@0
|
857 m_stepSize = stepSize;
|
matthiasm@0
|
858 frameCount = 0;
|
matthiasm@0
|
859 int tempn = 256 * m_blockSize/2;
|
matthiasm@4
|
860 // cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
861 float *tempkernel;
|
matthiasm@1
|
862
|
matthiasm@1
|
863 tempkernel = new float[tempn];
|
matthiasm@1
|
864
|
matthiasm@0
|
865 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
866 m_kernelValue.clear();
|
matthiasm@1
|
867 m_kernelFftIndex.clear();
|
matthiasm@1
|
868 m_kernelNoteIndex.clear();
|
matthiasm@1
|
869 int countNonzero = 0;
|
matthiasm@0
|
870 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
871 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
872 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
873 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
874 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
875 countNonzero++;
|
matthiasm@0
|
876 }
|
matthiasm@1
|
877 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
878 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
879 }
|
matthiasm@0
|
880 }
|
matthiasm@1
|
881 }
|
matthiasm@4
|
882 // cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
883 delete [] tempkernel;
|
matthiasm@3
|
884 ofstream myfile;
|
matthiasm@3
|
885 myfile.open ("matrix.txt");
|
matthiasm@3
|
886 // myfile << "Writing this to a file.\n";
|
matthiasm@3
|
887 for (int i = 0; i < nNote * 84; ++i) {
|
matthiasm@3
|
888 myfile << m_dict[i] << endl;
|
matthiasm@3
|
889 }
|
matthiasm@3
|
890 myfile.close();
|
matthiasm@0
|
891 return true;
|
matthiasm@0
|
892 }
|
matthiasm@0
|
893
|
matthiasm@0
|
894 void
|
matthiasm@0
|
895 NNLSChroma::reset()
|
matthiasm@0
|
896 {
|
matthiasm@4
|
897 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
898
|
matthiasm@0
|
899 // Clear buffers, reset stored values, etc
|
matthiasm@4
|
900 frameCount = 0;
|
matthiasm@4
|
901 m_dictID = 0;
|
matthiasm@4
|
902 m_fl.clear();
|
matthiasm@4
|
903 m_meanTuning0 = 0;
|
matthiasm@4
|
904 m_meanTuning1 = 0;
|
matthiasm@4
|
905 m_meanTuning2 = 0;
|
matthiasm@4
|
906 m_localTuning0 = 0;
|
matthiasm@4
|
907 m_localTuning1 = 0;
|
matthiasm@4
|
908 m_localTuning2 = 0;
|
matthiasm@4
|
909 m_localTuning.clear();
|
matthiasm@0
|
910 }
|
matthiasm@0
|
911
|
matthiasm@0
|
912 NNLSChroma::FeatureSet
|
matthiasm@0
|
913 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
914 {
|
matthiasm@4
|
915 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
916 frameCount++;
|
matthiasm@0
|
917 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
918
|
matthiasm@0
|
919 Feature f10; // local tuning
|
matthiasm@3
|
920 f10.hasTimestamp = true;
|
matthiasm@4
|
921 f10.timestamp = timestamp;
|
matthiasm@0
|
922 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
923
|
matthiasm@0
|
924 // make magnitude
|
matthiasm@0
|
925 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
926 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@0
|
927 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@0
|
928 }
|
matthiasm@4
|
929
|
matthiasm@0
|
930 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
931 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
932 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
933 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
934 }
|
matthiasm@0
|
935 int binCount = 0;
|
matthiasm@0
|
936 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
937 // cerr << ".";
|
matthiasm@1
|
938 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
939 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
940 binCount++;
|
matthiasm@0
|
941 }
|
matthiasm@1
|
942 // cerr << nm[20];
|
matthiasm@1
|
943 // cerr << endl;
|
matthiasm@0
|
944
|
matthiasm@0
|
945
|
matthiasm@0
|
946 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
947 // update means of complex tuning variables
|
matthiasm@0
|
948 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
949 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
950 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
951
|
matthiasm@0
|
952 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
953 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
954 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
955 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@3
|
956 float ratioOld = 0.997;
|
matthiasm@3
|
957 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
958 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
959 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
960 }
|
matthiasm@0
|
961
|
matthiasm@0
|
962 // if (m_tuneLocal) {
|
matthiasm@0
|
963 // local tuning
|
matthiasm@0
|
964 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
965 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
966 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
967 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
968 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
969 f10.values.push_back(tuning440);
|
matthiasm@3
|
970 // cerr << tuning440 << endl;
|
matthiasm@0
|
971 // }
|
matthiasm@0
|
972
|
matthiasm@0
|
973 Feature f1; // logfreqspec
|
matthiasm@0
|
974 f1.hasTimestamp = true;
|
matthiasm@0
|
975 f1.timestamp = timestamp;
|
matthiasm@0
|
976 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
977 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
978 }
|
matthiasm@0
|
979
|
matthiasm@0
|
980 FeatureSet fs;
|
matthiasm@0
|
981 fs[1].push_back(f1);
|
matthiasm@3
|
982 fs[8].push_back(f10);
|
matthiasm@0
|
983
|
matthiasm@0
|
984 // deletes
|
matthiasm@0
|
985 delete[] magnitude;
|
matthiasm@0
|
986 delete[] nm;
|
matthiasm@0
|
987
|
matthiasm@0
|
988 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@7
|
989 char * pPath;
|
matthiasm@7
|
990 pPath = getenv ("VAMP_PATH");
|
matthiasm@7
|
991
|
matthiasm@7
|
992
|
matthiasm@0
|
993 return fs;
|
matthiasm@0
|
994 }
|
matthiasm@0
|
995
|
matthiasm@0
|
996 NNLSChroma::FeatureSet
|
matthiasm@0
|
997 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
998 {
|
matthiasm@4
|
999 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@4
|
1000 FeatureSet fsOut;
|
matthiasm@4
|
1001 if (m_fl.size() == 0) return fsOut;
|
matthiasm@0
|
1002 //
|
matthiasm@1
|
1003 /** Calculate Tuning
|
matthiasm@1
|
1004 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
1005 cumulative mean real and imag values)
|
matthiasm@1
|
1006 **/
|
matthiasm@1
|
1007 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
1008 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
1009 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
1010 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
1011 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
1012 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1013
|
matthiasm@1
|
1014 char buffer0 [50];
|
matthiasm@1
|
1015
|
matthiasm@1
|
1016 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
1017
|
matthiasm@1
|
1018 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
1019
|
matthiasm@1
|
1020 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
1021 Feature f0; // tuning
|
matthiasm@1
|
1022 f0.hasTimestamp = true;
|
matthiasm@1
|
1023 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
1024 f0.label = buffer0;
|
matthiasm@1
|
1025 fsOut[0].push_back(f0);
|
matthiasm@1
|
1026
|
matthiasm@1
|
1027 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
1028 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
1029 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@1
|
1030 **/
|
matthiasm@1
|
1031
|
matthiasm@1
|
1032 float tempValue = 0;
|
matthiasm@1
|
1033 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
1034 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
1035 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
1036 int count = 0;
|
matthiasm@1
|
1037
|
matthiasm@1
|
1038 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
1039 Feature f1 = *i;
|
matthiasm@1
|
1040 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
1041 f2.hasTimestamp = true;
|
matthiasm@1
|
1042 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
1043 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
1044
|
matthiasm@1
|
1045 if (m_tuneLocal) {
|
matthiasm@1
|
1046 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
1047 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1048 }
|
matthiasm@1
|
1049
|
matthiasm@1
|
1050 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
1051
|
matthiasm@4
|
1052 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
1053 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
1054 f2.values.push_back(tempValue);
|
matthiasm@1
|
1055 }
|
matthiasm@1
|
1056
|
matthiasm@1
|
1057 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
1058 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
1059 vector<float> runningstd;
|
matthiasm@1
|
1060 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
1061 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
1062 }
|
matthiasm@1
|
1063 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
1064 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
1065 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
1066 if (runningstd[i] > 0) {
|
matthiasm@1
|
1067 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
1068 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1069 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
1070 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1071 }
|
matthiasm@1
|
1072 if (f2.values[i] < 0) {
|
matthiasm@1
|
1073 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
1074 }
|
matthiasm@1
|
1075 }
|
matthiasm@1
|
1076 fsOut[2].push_back(f2);
|
matthiasm@1
|
1077 count++;
|
matthiasm@1
|
1078 }
|
matthiasm@1
|
1079
|
matthiasm@1
|
1080 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
1081 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
1082 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
1083 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
1084 bass and treble stacked onto each other).
|
matthiasm@1
|
1085 **/
|
matthiasm@1
|
1086 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
|
matthiasm@1
|
1087
|
matthiasm@1
|
1088 vector<vector<float> > chordogram;
|
matthiasm@3
|
1089 vector<vector<int> > scoreChordogram;
|
matthiasm@1
|
1090 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
1091 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
1092 count = 0;
|
matthiasm@1
|
1093
|
matthiasm@1
|
1094 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
1095 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
1096 Feature f3; // semitone spectrum
|
matthiasm@1
|
1097 Feature f4; // treble chromagram
|
matthiasm@1
|
1098 Feature f5; // bass chromagram
|
matthiasm@1
|
1099 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
1100
|
matthiasm@1
|
1101 f3.hasTimestamp = true;
|
matthiasm@1
|
1102 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
1103
|
matthiasm@1
|
1104 f4.hasTimestamp = true;
|
matthiasm@1
|
1105 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
1106
|
matthiasm@1
|
1107 f5.hasTimestamp = true;
|
matthiasm@1
|
1108 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
1109
|
matthiasm@1
|
1110 f6.hasTimestamp = true;
|
matthiasm@1
|
1111 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
1112
|
matthiasm@3
|
1113 float b[256];
|
matthiasm@1
|
1114
|
matthiasm@1
|
1115 bool some_b_greater_zero = false;
|
matthiasm@3
|
1116 float sumb = 0;
|
matthiasm@1
|
1117 for (int i = 0; i < 256; i++) {
|
matthiasm@3
|
1118 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@3
|
1119 b[i] = f2.values[i];
|
matthiasm@3
|
1120 sumb += b[i];
|
matthiasm@1
|
1121 if (b[i] > 0) {
|
matthiasm@1
|
1122 some_b_greater_zero = true;
|
matthiasm@1
|
1123 }
|
matthiasm@1
|
1124 }
|
matthiasm@1
|
1125
|
matthiasm@1
|
1126 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
1127
|
matthiasm@1
|
1128 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
1129 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
1130 float currval;
|
matthiasm@1
|
1131 unsigned iSemitone = 0;
|
matthiasm@1
|
1132
|
matthiasm@1
|
1133 if (some_b_greater_zero) {
|
matthiasm@3
|
1134 if (m_dictID == 1) {
|
matthiasm@1
|
1135 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
1136 currval = 0;
|
matthiasm@3
|
1137 currval += b[iNote + 1 + -1] * 0.5;
|
matthiasm@3
|
1138 currval += b[iNote + 1 + 0] * 1.0;
|
matthiasm@3
|
1139 currval += b[iNote + 1 + 1] * 0.5;
|
matthiasm@1
|
1140 f3.values.push_back(currval);
|
matthiasm@1
|
1141 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
1142 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
1143 iSemitone++;
|
matthiasm@1
|
1144 }
|
matthiasm@1
|
1145
|
matthiasm@1
|
1146 } else {
|
matthiasm@3
|
1147 float x[84+1000];
|
matthiasm@3
|
1148 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
matthiasm@3
|
1149 // for (int i = 0; i < 84; ++i) {
|
matthiasm@3
|
1150 // x[i] = b[3*i+3];
|
matthiasm@3
|
1151 // }
|
matthiasm@3
|
1152 float rnorm;
|
matthiasm@3
|
1153 float w[84+1000];
|
matthiasm@3
|
1154 float zz[84+1000];
|
matthiasm@3
|
1155 int indx[84+1000];
|
matthiasm@1
|
1156 int mode;
|
matthiasm@3
|
1157 float curr_dict[256*84];
|
matthiasm@3
|
1158 for (unsigned i = 0; i < 256 * 84; ++i) {
|
matthiasm@3
|
1159 curr_dict[i] = 1.0 * m_dict[i];
|
matthiasm@3
|
1160 }
|
matthiasm@3
|
1161 nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@3
|
1162 for (unsigned iNote = 0; iNote < 84; ++iNote) {
|
matthiasm@3
|
1163 // for (unsigned kNote = 0; kNote < 256; ++kNote) {
|
matthiasm@3
|
1164 // x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
|
matthiasm@3
|
1165 // }
|
matthiasm@3
|
1166 f3.values.push_back(x[iNote]);
|
matthiasm@3
|
1167 // cerr << mode << endl;
|
matthiasm@3
|
1168 chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
|
matthiasm@3
|
1169 basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
|
matthiasm@3
|
1170 // iSemitone++;
|
matthiasm@3
|
1171 }
|
matthiasm@1
|
1172 }
|
matthiasm@1
|
1173 }
|
matthiasm@1
|
1174
|
matthiasm@1
|
1175 f4.values = chroma;
|
matthiasm@1
|
1176 f5.values = basschroma;
|
matthiasm@1
|
1177 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
1178 f6.values = chroma;
|
matthiasm@1
|
1179
|
matthiasm@1
|
1180 // local chord estimation
|
matthiasm@1
|
1181 vector<float> currentChordSalience;
|
matthiasm@1
|
1182 float tempchordvalue = 0;
|
matthiasm@1
|
1183 float sumchordvalue = 0;
|
matthiasm@1
|
1184 int nChord = nChorddict / 24;
|
matthiasm@1
|
1185 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1186 tempchordvalue = 0;
|
matthiasm@1
|
1187 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@1
|
1188 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1189 }
|
matthiasm@1
|
1190 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@1
|
1191 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1192 }
|
matthiasm@1
|
1193 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
1194 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
1195 }
|
matthiasm@1
|
1196 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1197 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@1
|
1198 }
|
matthiasm@1
|
1199 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1200
|
matthiasm@1
|
1201 fsOut[3].push_back(f3);
|
matthiasm@1
|
1202 fsOut[4].push_back(f4);
|
matthiasm@1
|
1203 fsOut[5].push_back(f5);
|
matthiasm@1
|
1204 fsOut[6].push_back(f6);
|
matthiasm@1
|
1205 count++;
|
matthiasm@1
|
1206 }
|
matthiasm@0
|
1207 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
|
matthiasm@0
|
1208 //
|
matthiasm@3
|
1209 /* Simple chord estimation
|
matthiasm@3
|
1210 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@3
|
1211 take the maximum. Very simple, don't do this at home...
|
matthiasm@3
|
1212 */
|
matthiasm@3
|
1213 count = 0;
|
matthiasm@3
|
1214 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@3
|
1215 int nChord = nChorddict / 24;
|
matthiasm@3
|
1216 vector<int> chordSequence;
|
matthiasm@3
|
1217 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
matthiasm@3
|
1218 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@3
|
1219 scoreChordogram.push_back(temp);
|
matthiasm@3
|
1220 }
|
matthiasm@4
|
1221 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
matthiasm@3
|
1222 int startIndex = count + 1;
|
matthiasm@3
|
1223 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@3
|
1224 vector<float> temp = vector<float>(nChord,0);
|
matthiasm@3
|
1225 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@4
|
1226 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1227 unsigned bestchordL = 0; // index of the best "left" chord
|
matthiasm@3
|
1228 unsigned bestchordR = 0; // index of the best "right" chord
|
matthiasm@4
|
1229 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@3
|
1230 // now find the max values on both sides of iWF
|
matthiasm@3
|
1231 // left side:
|
matthiasm@3
|
1232 float maxL = 0;
|
matthiasm@3
|
1233 unsigned maxindL = nChord-1;
|
matthiasm@3
|
1234 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1235 float currsum = 0;
|
matthiasm@3
|
1236 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@3
|
1237 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1238 }
|
matthiasm@3
|
1239 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1240 if (currsum > maxL) {
|
matthiasm@3
|
1241 maxL = currsum;
|
matthiasm@3
|
1242 maxindL = iChord;
|
matthiasm@3
|
1243 }
|
matthiasm@3
|
1244 }
|
matthiasm@3
|
1245 // right side:
|
matthiasm@3
|
1246 float maxR = 0;
|
matthiasm@3
|
1247 unsigned maxindR = nChord-1;
|
matthiasm@3
|
1248 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1249 float currsum = 0;
|
matthiasm@3
|
1250 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1251 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1252 }
|
matthiasm@3
|
1253 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1254 if (currsum > maxR) {
|
matthiasm@3
|
1255 maxR = currsum;
|
matthiasm@3
|
1256 maxindR = iChord;
|
matthiasm@3
|
1257 }
|
matthiasm@3
|
1258 }
|
matthiasm@3
|
1259 if (maxL+maxR > maxval) {
|
matthiasm@3
|
1260 maxval = maxL+maxR;
|
matthiasm@3
|
1261 maxindex = iWF;
|
matthiasm@3
|
1262 bestchordL = maxindL;
|
matthiasm@3
|
1263 bestchordR = maxindR;
|
matthiasm@3
|
1264 }
|
matthiasm@3
|
1265
|
matthiasm@3
|
1266 }
|
matthiasm@3
|
1267 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@3
|
1268 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@3
|
1269 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@3
|
1270 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@3
|
1271 }
|
matthiasm@3
|
1272 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1273 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@3
|
1274 }
|
matthiasm@3
|
1275 count++;
|
matthiasm@3
|
1276 }
|
matthiasm@3
|
1277
|
matthiasm@3
|
1278 count = 0;
|
matthiasm@3
|
1279 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1280 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1281 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1282 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1283 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@3
|
1284 maxval = scoreChordogram[count][iChord];
|
matthiasm@3
|
1285 maxindex = iChord;
|
matthiasm@4
|
1286 // cerr << iChord << endl;
|
matthiasm@3
|
1287 }
|
matthiasm@3
|
1288 }
|
matthiasm@3
|
1289 chordSequence.push_back(maxindex);
|
matthiasm@4
|
1290 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
matthiasm@3
|
1291 count++;
|
matthiasm@3
|
1292 }
|
matthiasm@3
|
1293
|
matthiasm@3
|
1294
|
matthiasm@3
|
1295 // mode filter on chordSequence
|
matthiasm@3
|
1296 count = 0;
|
matthiasm@3
|
1297 int oldChordIndex = -1;
|
matthiasm@3
|
1298 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1299 Feature f6 = *it;
|
matthiasm@3
|
1300 Feature f7; // chord estimate
|
matthiasm@3
|
1301 f7.hasTimestamp = true;
|
matthiasm@3
|
1302 f7.timestamp = f6.timestamp;
|
matthiasm@3
|
1303 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@3
|
1304 int maxChordCount = 0;
|
matthiasm@3
|
1305 int maxChordIndex = nChord-1;
|
matthiasm@4
|
1306 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@4
|
1307 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@4
|
1308 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@4
|
1309 chordCount[chordSequence[i]]++;
|
matthiasm@4
|
1310 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@7
|
1311 // cerr << "start index " << startIndex << endl;
|
matthiasm@4
|
1312 maxChordCount++;
|
matthiasm@4
|
1313 maxChordIndex = chordSequence[i];
|
matthiasm@4
|
1314 }
|
matthiasm@4
|
1315 }
|
matthiasm@4
|
1316 // chordSequence[count] = maxChordIndex;
|
matthiasm@7
|
1317 // cerr << maxChordIndex << endl;
|
matthiasm@3
|
1318 if (oldChordIndex != maxChordIndex) {
|
matthiasm@3
|
1319 oldChordIndex = maxChordIndex;
|
matthiasm@3
|
1320
|
matthiasm@3
|
1321 char buffer1 [50];
|
matthiasm@3
|
1322 if (maxChordIndex < nChord - 1) {
|
matthiasm@3
|
1323 sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@3
|
1324 } else {
|
matthiasm@3
|
1325 sprintf(buffer1, "N");
|
matthiasm@3
|
1326 }
|
matthiasm@3
|
1327 f7.label = buffer1;
|
matthiasm@3
|
1328 fsOut[7].push_back(f7);
|
matthiasm@3
|
1329 }
|
matthiasm@3
|
1330 count++;
|
matthiasm@3
|
1331 }
|
matthiasm@0
|
1332 // // musicity
|
matthiasm@0
|
1333 // count = 0;
|
matthiasm@0
|
1334 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1335 // vector<float> musicityValue;
|
matthiasm@0
|
1336 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1337 // Feature f4 = *it;
|
matthiasm@0
|
1338 //
|
matthiasm@0
|
1339 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1340 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1341 // float chromasum = 0;
|
matthiasm@0
|
1342 // float diffsum = 0;
|
matthiasm@0
|
1343 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1344 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1345 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1346 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1347 // }
|
matthiasm@0
|
1348 // }
|
matthiasm@0
|
1349 // diffsum /= chromasum;
|
matthiasm@0
|
1350 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1351 // count++;
|
matthiasm@0
|
1352 // }
|
matthiasm@0
|
1353 //
|
matthiasm@0
|
1354 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1355 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1356 // musicityThreshold = 0.74;
|
matthiasm@0
|
1357 // }
|
matthiasm@0
|
1358 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1359 // musicityThreshold = 0.77;
|
matthiasm@0
|
1360 // }
|
matthiasm@0
|
1361 //
|
matthiasm@0
|
1362 // count = 0;
|
matthiasm@0
|
1363 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1364 // Feature f4 = *it;
|
matthiasm@0
|
1365 // Feature f8; // musicity
|
matthiasm@0
|
1366 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1367 //
|
matthiasm@0
|
1368 // f8.hasTimestamp = true;
|
matthiasm@0
|
1369 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1370 // f9.hasTimestamp = true;
|
matthiasm@0
|
1371 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1372 //
|
matthiasm@0
|
1373 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1374 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1375 // int musicityCount = 0;
|
matthiasm@0
|
1376 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1377 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1378 // }
|
matthiasm@0
|
1379 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1380 //
|
matthiasm@0
|
1381 // if (isSpeech) {
|
matthiasm@0
|
1382 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1383 // f9.label = "Speech";
|
matthiasm@0
|
1384 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1385 // oldlabeltype = 2;
|
matthiasm@0
|
1386 // }
|
matthiasm@0
|
1387 // } else {
|
matthiasm@0
|
1388 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1389 // f9.label = "Music";
|
matthiasm@0
|
1390 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1391 // oldlabeltype = 1;
|
matthiasm@0
|
1392 // }
|
matthiasm@0
|
1393 // }
|
matthiasm@0
|
1394 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1395 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1396 // count++;
|
matthiasm@0
|
1397 // }
|
matthiasm@0
|
1398 return fsOut;
|
matthiasm@0
|
1399
|
matthiasm@0
|
1400 }
|
matthiasm@0
|
1401
|