comparison NNLSChroma.cpp @ 23:93c836cfb8c5 matthiasm-plugin

* Consistent indentation (spaces only)
author Chris Cannam
date Thu, 21 Oct 2010 12:12:23 +0100
parents 444c344681f3
children 690bd9148467
comparison
equal deleted inserted replaced
22:444c344681f3 23:93c836cfb8c5
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
1 2
2 #include "NNLSChroma.h" 3 #include "NNLSChroma.h"
3 #include <cmath> 4 #include <cmath>
4 // #include <omp.h> 5 // #include <omp.h>
5 #include <list> 6 #include <list>
28 const float cosvalue = -0.5; 29 const float cosvalue = -0.5;
29 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082}; 30 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
30 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}; 31 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
31 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350}; 32 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
32 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)", 33 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
33 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"}; 34 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
34 35
35 const char* bassnames[12][12] ={ 36 const char* bassnames[12][12] ={
36 {"A","","B","C","C#","D","","E","","F#","G","G#"}, 37 {"A","","B","C","C#","D","","E","","F#","G","G#"},
37 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"}, 38 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
38 {"B","","C#","D","D#","E","","F#","","G#","A","A#"}, 39 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
39 {"C","","D","Eb","E","F","","G","","A","Bb","B"}, 40 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
40 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"}, 41 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
41 {"D","","E","F","F#","G","","A","","B","C","C#"}, 42 {"D","","E","F","F#","G","","A","","B","C","C#"},
42 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"}, 43 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
43 {"E","","F#","G","G#","A","","B","","C#","D","D#"}, 44 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
44 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"}, 45 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
45 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"}, 46 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
46 {"G","","A","Bb","B","C","","D","","E","F","F#"}, 47 {"G","","A","Bb","B","C","","D","","E","F","F#"},
47 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"} 48 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
48 }; 49 };
49 50
50 51
51 // const char* bassnames[12][12] ={ 52 // const char* bassnames[12][12] ={
52 // {"1","","2","b3","3","4","","5","","6","b7","7"}, 53 // {"1","","2","b3","3","4","","5","","6","b7","7"},
65 66
66 const vector<float> hw(hammingwind, hammingwind+19); 67 const vector<float> hw(hammingwind, hammingwind+19);
67 const int nNote = 256; 68 const int nNote = 256;
68 69
69 /** Special Convolution 70 /** Special Convolution
70 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the 71 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
71 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values 72 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
72 as the first (last) valid convolution bin. 73 as the first (last) valid convolution bin.
73 **/ 74 **/
74 75
75 const bool debug_on = false; 76 const bool debug_on = false;
76 77
77 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel) 78 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
96 } 97 }
97 98
98 // fill upper and lower pads 99 // fill upper and lower pads
99 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2]; 100 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
100 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = 101 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
101 Z[lenConvolvee - lenKernel/2 - 1]; 102 Z[lenConvolvee - lenKernel/2 - 1];
102 return Z; 103 return Z;
103 } 104 }
104 105
105 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize) 106 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
106 // { 107 // {
111 // return freq; 112 // return freq;
112 // } 113 // }
113 114
114 float cospuls(float x, float centre, float width) 115 float cospuls(float x, float centre, float width)
115 { 116 {
116 float recipwidth = 1.0/width; 117 float recipwidth = 1.0/width;
117 if (abs(x - centre) <= 0.5 * width) { 118 if (abs(x - centre) <= 0.5 * width) {
118 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5; 119 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
119 } 120 }
120 return 0.0; 121 return 0.0;
121 } 122 }
122 123
123 float pitchCospuls(float x, float centre, int binsperoctave) 124 float pitchCospuls(float x, float centre, int binsperoctave)
124 { 125 {
125 float warpedf = -binsperoctave * (log2(centre) - log2(x)); 126 float warpedf = -binsperoctave * (log2(centre) - log2(x));
126 float out = cospuls(warpedf, 0.0, 2.0); 127 float out = cospuls(warpedf, 0.0, 2.0);
127 // now scale to correct for note density 128 // now scale to correct for note density
128 float c = log(2.0)/binsperoctave; 129 float c = log(2.0)/binsperoctave;
129 if (x > 0) { 130 if (x > 0) {
130 out = out / (c * x); 131 out = out / (c * x);
131 } else { 132 } else {
132 out = 0; 133 out = 0;
133 } 134 }
134 return out; 135 return out;
135 } 136 }
136 137
137 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { 138 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
138 139
139 int binspersemitone = 3; // this must be 3 140 int binspersemitone = 3; // this must be 3
140 int minoctave = 0; // this must be 0 141 int minoctave = 0; // this must be 0
141 int maxoctave = 7; // this must be 7 142 int maxoctave = 7; // this must be 7
142 int oversampling = 80; 143 int oversampling = 80;
143 144
144 // linear frequency vector 145 // linear frequency vector
145 vector<float> fft_f; 146 vector<float> fft_f;
146 for (int i = 0; i < blocksize/2; ++i) { 147 for (int i = 0; i < blocksize/2; ++i) {
147 fft_f.push_back(i * (fs * 1.0 / blocksize)); 148 fft_f.push_back(i * (fs * 1.0 / blocksize));
148 } 149 }
149 float fft_width = fs * 2.0 / blocksize; 150 float fft_width = fs * 2.0 / blocksize;
150 151
151 // linear oversampled frequency vector 152 // linear oversampled frequency vector
152 vector<float> oversampled_f; 153 vector<float> oversampled_f;
153 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) { 154 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
154 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling)); 155 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
155 } 156 }
156 157
157 // pitch-spaced frequency vector 158 // pitch-spaced frequency vector
158 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! 159 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
159 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! 160 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
160 vector<float> cq_f; 161 vector<float> cq_f;
161 float oob = 1.0/binspersemitone; // one over binspersemitone 162 float oob = 1.0/binspersemitone; // one over binspersemitone
162 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 163 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
163 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); 164 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
164 for (int i = minMIDI + 1; i < maxMIDI; ++i) { 165 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
165 for (int k = -1; k < 2; ++k) { 166 for (int k = -1; k < 2; ++k) {
166 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); 167 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
167 } 168 }
168 } 169 }
169 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); 170 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
170 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); 171 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
171 172
172 int nFFT = fft_f.size(); 173 int nFFT = fft_f.size();
173 174
174 vector<float> fft_activation; 175 vector<float> fft_activation;
175 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) { 176 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
176 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width); 177 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
177 fft_activation.push_back(cosp); 178 fft_activation.push_back(cosp);
178 // cerr << cosp << endl; 179 // cerr << cosp << endl;
179 } 180 }
180 181
181 float cq_activation; 182 float cq_activation;
182 for (int iFFT = 1; iFFT < nFFT; ++iFFT) { 183 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
183 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency) 184 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
184 int curr_start = oversampling * iFFT - oversampling; 185 int curr_start = oversampling * iFFT - oversampling;
185 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here 186 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
186 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl; 187 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
187 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) { 188 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
188 outmatrix[iFFT + nFFT * iCQ] = 0; 189 outmatrix[iFFT + nFFT * iCQ] = 0;
189 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood 190 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
190 for (int iOS = curr_start; iOS < curr_end; ++iOS) { 191 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
191 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12); 192 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
192 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl; 193 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
193 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start]; 194 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
194 } 195 }
195 // if (iCQ == 1 || iCQ == 2) { 196 // if (iCQ == 1 || iCQ == 2) {
196 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl; 197 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
197 // } 198 // }
198 } 199 }
199 } 200 }
200 } 201 }
201 return true; 202 return true;
202 } 203 }
203 204
204 void dictionaryMatrix(float* dm) { 205 void dictionaryMatrix(float* dm) {
205 int binspersemitone = 3; // this must be 3 206 int binspersemitone = 3; // this must be 3
206 int minoctave = 0; // this must be 0 207 int minoctave = 0; // this must be 0
207 int maxoctave = 7; // this must be 7 208 int maxoctave = 7; // this must be 7
208 float s_param = 0.7; 209 float s_param = 0.7;
209 210
210 // pitch-spaced frequency vector 211 // pitch-spaced frequency vector
211 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! 212 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
212 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! 213 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
213 vector<float> cq_f; 214 vector<float> cq_f;
214 float oob = 1.0/binspersemitone; // one over binspersemitone 215 float oob = 1.0/binspersemitone; // one over binspersemitone
215 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 216 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
216 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); 217 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
217 for (int i = minMIDI + 1; i < maxMIDI; ++i) { 218 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
218 for (int k = -1; k < 2; ++k) { 219 for (int k = -1; k < 2; ++k) {
219 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); 220 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
220 } 221 }
221 } 222 }
222 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); 223 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
223 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); 224 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
224 225
225 float curr_f; 226 float curr_f;
226 float floatbin; 227 float floatbin;
227 float curr_amp; 228 float curr_amp;
228 // now for every combination calculate the matrix element 229 // now for every combination calculate the matrix element
229 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) { 230 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
230 // cerr << iOut << endl; 231 // cerr << iOut << endl;
231 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) { 232 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
232 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm; 233 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
233 // if (curr_f > cq_f[nNote-1]) break; 234 // if (curr_f > cq_f[nNote-1]) break;
234 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm); 235 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
235 // cerr << floatbin << endl; 236 // cerr << floatbin << endl;
236 curr_amp = pow(s_param,float(iHarm-1)); 237 curr_amp = pow(s_param,float(iHarm-1));
237 // cerr << "curramp" << curr_amp << endl; 238 // cerr << "curramp" << curr_amp << endl;
238 for (unsigned iNote = 0; iNote < nNote; ++iNote) { 239 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
239 if (abs(iNote+1.0-floatbin)<2) { 240 if (abs(iNote+1.0-floatbin)<2) {
240 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp; 241 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
241 // dm[iNote + nNote * iOut] += 1 * curr_amp; 242 // dm[iNote + nNote * iOut] += 1 * curr_amp;
242 } 243 }
243 } 244 }
244 } 245 }
245 } 246 }
246 247
247 248
248 } 249 }
249 250
250 string get_env_var( std::string const & key ) { 251 string get_env_var( std::string const & key ) {
251 char * val; 252 char * val;
252 val = getenv( key.c_str() ); 253 val = getenv( key.c_str() );
253 string retval; 254 string retval;
254 if (val != NULL) { 255 if (val != NULL) {
255 retval = val; 256 retval = val;
256 } 257 }
257 return retval; 258 return retval;
258 } 259 }
259 260
260 261
261 vector<string> chordDictionary(vector<float> *mchorddict) { 262 vector<string> chordDictionary(vector<float> *mchorddict) {
262 // ifstream chordDictFile; 263 // ifstream chordDictFile;
263 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict"); 264 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
264 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n"; 265 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
265 typedef tokenizer<char_separator<char> > Tok; 266 typedef tokenizer<char_separator<char> > Tok;
266 // char_separator<char> sep; // default constructed 267 // char_separator<char> sep; // default constructed
267 char_separator<char> sep(",; ","="); 268 char_separator<char> sep(",; ","=");
268 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str()); 269 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
269 string line; 270 string line;
270 int iElement = 0; 271 int iElement = 0;
271 int nChord = 0; 272 int nChord = 0;
272 273
273 vector<string> loadedChordNames; 274 vector<string> loadedChordNames;
274 vector<float> loadedChordDict; 275 vector<float> loadedChordDict;
275 if (chordDictFile.is_open()) { 276 if (chordDictFile.is_open()) {
276 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file 277 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
277 // first, get the chord definition 278 // first, get the chord definition
278 string chordType; 279 string chordType;
279 vector<float> tempPCVector; 280 vector<float> tempPCVector;
280 // cerr << line << endl; 281 // cerr << line << endl;
281 if (!line.empty() && line.substr(0,1) != "#") { 282 if (!line.empty() && line.substr(0,1) != "#") {
282 Tok tok(line, sep); 283 Tok tok(line, sep);
283 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements 284 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
284 string tempString = *tok_iter; 285 string tempString = *tok_iter;
285 // cerr << tempString << endl; 286 // cerr << tempString << endl;
286 if (tok_iter == tok.begin()) { // either the chord name or a colon 287 if (tok_iter == tok.begin()) { // either the chord name or a colon
287 if (tempString == "=") { 288 if (tempString == "=") {
288 chordType = ""; 289 chordType = "";
289 } else { 290 } else {
290 chordType = tempString; 291 chordType = tempString;
291 tok_iter++; // is this cheating ? :) 292 tok_iter++; // is this cheating ? :)
292 } 293 }
293 } else { 294 } else {
294 tempPCVector.push_back(lexical_cast<float>(*tok_iter)); 295 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
295 } 296 }
296 } 297 }
297 298
298 // now make all 12 chords of every type 299 // now make all 12 chords of every type
299 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) { 300 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
300 // add bass slash notation 301 // add bass slash notation
301 string slashNotation = ""; 302 string slashNotation = "";
302 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) { 303 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
303 if (tempPCVector[(kSemitone) % 12] > 0.99) { 304 if (tempPCVector[(kSemitone) % 12] > 0.99) {
304 slashNotation = bassnames[iSemitone][kSemitone]; 305 slashNotation = bassnames[iSemitone][kSemitone];
305 } 306 }
306 } 307 }
307 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes 308 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
308 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl; 309 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
309 float bassValue = 0; 310 float bassValue = 0;
310 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) { 311 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
311 bassValue = 1; 312 bassValue = 1;
312 } else { 313 } else {
313 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5; 314 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
314 } 315 }
315 loadedChordDict.push_back(bassValue); 316 loadedChordDict.push_back(bassValue);
316 } 317 }
317 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes 318 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
318 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]); 319 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
319 } 320 }
320 ostringstream os; 321 ostringstream os;
321 if (slashNotation.empty()) { 322 if (slashNotation.empty()) {
322 os << notenames[12+iSemitone] << chordType; 323 os << notenames[12+iSemitone] << chordType;
323 } else { 324 } else {
324 os << notenames[12+iSemitone] << chordType << "/" << slashNotation; 325 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
325 } 326 }
326 // cerr << os.str() << endl; 327 // cerr << os.str() << endl;
327 loadedChordNames.push_back(os.str()); 328 loadedChordNames.push_back(os.str());
328 } 329 }
329 } 330 }
330 } 331 }
331 // N type 332 // N type
332 loadedChordNames.push_back("N"); 333 loadedChordNames.push_back("N");
333 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5); 334 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
334 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0); 335 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
335 336
336 // normalise 337 // normalise
337 float sum = 0; 338 float sum = 0;
338 for (int i = 0; i < loadedChordDict.size(); i++) { 339 for (int i = 0; i < loadedChordDict.size(); i++) {
339 sum += pow(loadedChordDict[i],2); 340 sum += pow(loadedChordDict[i],2);
340 if (i % 24 == 23) { 341 if (i % 24 == 23) {
341 float invertedsum = 1.0/sqrt(sum); 342 float invertedsum = 1.0/sqrt(sum);
342 for (int k = 0; k < 24; k++) { 343 for (int k = 0; k < 24; k++) {
343 loadedChordDict[i-k] *= invertedsum; 344 loadedChordDict[i-k] *= invertedsum;
344 } 345 }
345 sum = 0; 346 sum = 0;
346 } 347 }
347 348
348 } 349 }
349 350
350 351
351 nChord = 0; 352 nChord = 0;
352 for (int i = 0; i < loadedChordNames.size(); i++) { 353 for (int i = 0; i < loadedChordNames.size(); i++) {
353 nChord++; 354 nChord++;
354 } 355 }
355 chordDictFile.close(); 356 chordDictFile.close();
356 357
357 358
358 // mchorddict = new float[nChord*24]; 359 // mchorddict = new float[nChord*24];
359 for (int i = 0; i < nChord*24; i++) { 360 for (int i = 0; i < nChord*24; i++) {
360 mchorddict->push_back(loadedChordDict[i]); 361 mchorddict->push_back(loadedChordDict[i]);
361 } 362 }
362 363
363 } else {// use default from chorddict.cpp 364 } else {// use default from chorddict.cpp
364 // mchorddict = new float[nChorddict]; 365 // mchorddict = new float[nChorddict];
365 for (int i = 0; i < nChorddict; i++) { 366 for (int i = 0; i < nChorddict; i++) {
366 mchorddict->push_back(chorddict[i]); 367 mchorddict->push_back(chorddict[i]);
367 } 368 }
368 369
369 nChord = nChorddict/24; 370 nChord = nChorddict/24;
370 // mchordnames = new string[nChorddict/24]; 371 // mchordnames = new string[nChorddict/24];
371 char buffer1 [50]; 372 char buffer1 [50];
372 for (int i = 0; i < nChorddict/24; i++) { 373 for (int i = 0; i < nChorddict/24; i++) {
373 if (i < nChorddict/24 - 1) { 374 if (i < nChorddict/24 - 1) {
374 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]); 375 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
375 } else { 376 } else {
376 sprintf(buffer1, "N"); 377 sprintf(buffer1, "N");
377 } 378 }
378 ostringstream os; 379 ostringstream os;
379 os << buffer1; 380 os << buffer1;
380 loadedChordNames.push_back(os.str()); 381 loadedChordNames.push_back(os.str());
381 382
382 } 383 }
383 384
384 } 385 }
385 // cerr << "before leaving" << chordnames[1] << endl; 386 // cerr << "before leaving" << chordnames[1] << endl;
386 return loadedChordNames; 387 return loadedChordNames;
387 } 388 }
388 389
389 NNLSChroma::NNLSChroma(float inputSampleRate) : 390 NNLSChroma::NNLSChroma(float inputSampleRate) :
390 Plugin(inputSampleRate), 391 Plugin(inputSampleRate),
391 m_fl(0), 392 m_fl(0),
392 m_blockSize(0), 393 m_blockSize(0),
393 m_stepSize(0), 394 m_stepSize(0),
394 m_lengthOfNoteIndex(0), 395 m_lengthOfNoteIndex(0),
395 m_meanTuning0(0), 396 m_meanTuning0(0),
396 m_meanTuning1(0), 397 m_meanTuning1(0),
397 m_meanTuning2(0), 398 m_meanTuning2(0),
398 m_localTuning0(0), 399 m_localTuning0(0),
399 m_localTuning1(0), 400 m_localTuning1(0),
400 m_localTuning2(0), 401 m_localTuning2(0),
401 m_paling(1.0), 402 m_paling(1.0),
402 m_preset(0.0), 403 m_preset(0.0),
403 m_localTuning(0), 404 m_localTuning(0),
404 m_kernelValue(0), 405 m_kernelValue(0),
405 m_kernelFftIndex(0), 406 m_kernelFftIndex(0),
406 m_kernelNoteIndex(0), 407 m_kernelNoteIndex(0),
407 m_dict(0), 408 m_dict(0),
408 m_tuneLocal(false), 409 m_tuneLocal(false),
409 m_dictID(0), 410 m_dictID(0),
410 m_chorddict(0), 411 m_chorddict(0),
411 m_chordnames(0), 412 m_chordnames(0),
412 m_doNormalizeChroma(0), 413 m_doNormalizeChroma(0),
413 m_rollon(0.01) 414 m_rollon(0.01)
414 { 415 {
415 if (debug_on) cerr << "--> NNLSChroma" << endl; 416 if (debug_on) cerr << "--> NNLSChroma" << endl;
416 417
417 // make the *note* dictionary matrix 418 // make the *note* dictionary matrix
418 m_dict = new float[nNote * 84]; 419 m_dict = new float[nNote * 84];
419 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; 420 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
420 dictionaryMatrix(m_dict); 421 dictionaryMatrix(m_dict);
421 422
422 // get the *chord* dictionary from file (if the file exists) 423 // get the *chord* dictionary from file (if the file exists)
423 m_chordnames = chordDictionary(&m_chorddict); 424 m_chordnames = chordDictionary(&m_chorddict);
424 } 425 }
425 426
426 427
427 NNLSChroma::~NNLSChroma() 428 NNLSChroma::~NNLSChroma()
428 { 429 {
429 if (debug_on) cerr << "--> ~NNLSChroma" << endl; 430 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
430 delete [] m_dict; 431 delete [] m_dict;
431 // delete [] m_chorddict; 432 // delete [] m_chorddict;
432 // delete m_chordnames; 433 // delete m_chordnames;
433 } 434 }
434 435
435 string 436 string
436 NNLSChroma::getIdentifier() const 437 NNLSChroma::getIdentifier() const
437 { 438 {
438 if (debug_on) cerr << "--> getIdentifier" << endl; 439 if (debug_on) cerr << "--> getIdentifier" << endl;
439 return "nnls_chroma"; 440 return "nnls_chroma";
440 } 441 }
441 442
442 string 443 string
443 NNLSChroma::getName() const 444 NNLSChroma::getName() const
444 { 445 {
445 if (debug_on) cerr << "--> getName" << endl; 446 if (debug_on) cerr << "--> getName" << endl;
446 return "NNLS Chroma"; 447 return "NNLS Chroma";
447 } 448 }
448 449
449 string 450 string
450 NNLSChroma::getDescription() const 451 NNLSChroma::getDescription() const
451 { 452 {
452 // Return something helpful here! 453 // Return something helpful here!
453 if (debug_on) cerr << "--> getDescription" << endl; 454 if (debug_on) cerr << "--> getDescription" << endl;
454 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate."; 455 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
455 } 456 }
456 457
457 string 458 string
458 NNLSChroma::getMaker() const 459 NNLSChroma::getMaker() const
459 { 460 {
460 if (debug_on) cerr << "--> getMaker" << endl; 461 if (debug_on) cerr << "--> getMaker" << endl;
461 // Your name here 462 // Your name here
462 return "Matthias Mauch"; 463 return "Matthias Mauch";
463 } 464 }
464 465
465 int 466 int
466 NNLSChroma::getPluginVersion() const 467 NNLSChroma::getPluginVersion() const
467 { 468 {
468 if (debug_on) cerr << "--> getPluginVersion" << endl; 469 if (debug_on) cerr << "--> getPluginVersion" << endl;
469 // Increment this each time you release a version that behaves 470 // Increment this each time you release a version that behaves
470 // differently from the previous one 471 // differently from the previous one
471 return 1; 472 return 1;
472 } 473 }
473 474
474 string 475 string
475 NNLSChroma::getCopyright() const 476 NNLSChroma::getCopyright() const
476 { 477 {
477 if (debug_on) cerr << "--> getCopyright" << endl; 478 if (debug_on) cerr << "--> getCopyright" << endl;
478 // This function is not ideally named. It does not necessarily 479 // This function is not ideally named. It does not necessarily
479 // need to say who made the plugin -- getMaker does that -- but it 480 // need to say who made the plugin -- getMaker does that -- but it
480 // should indicate the terms under which it is distributed. For 481 // should indicate the terms under which it is distributed. For
481 // example, "Copyright (year). All Rights Reserved", or "GPL" 482 // example, "Copyright (year). All Rights Reserved", or "GPL"
482 return "Copyright (2010). All rights reserved."; 483 return "Copyright (2010). All rights reserved.";
483 } 484 }
484 485
485 NNLSChroma::InputDomain 486 NNLSChroma::InputDomain
486 NNLSChroma::getInputDomain() const 487 NNLSChroma::getInputDomain() const
487 { 488 {
488 if (debug_on) cerr << "--> getInputDomain" << endl; 489 if (debug_on) cerr << "--> getInputDomain" << endl;
489 return FrequencyDomain; 490 return FrequencyDomain;
490 } 491 }
491 492
492 size_t 493 size_t
493 NNLSChroma::getPreferredBlockSize() const 494 NNLSChroma::getPreferredBlockSize() const
494 { 495 {
495 if (debug_on) cerr << "--> getPreferredBlockSize" << endl; 496 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
496 return 16384; // 0 means "I can handle any block size" 497 return 16384; // 0 means "I can handle any block size"
497 } 498 }
498 499
499 size_t 500 size_t
500 NNLSChroma::getPreferredStepSize() const 501 NNLSChroma::getPreferredStepSize() const
501 { 502 {
502 if (debug_on) cerr << "--> getPreferredStepSize" << endl; 503 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
503 return 2048; // 0 means "anything sensible"; in practice this 504 return 2048; // 0 means "anything sensible"; in practice this
504 // means the same as the block size for TimeDomain 505 // means the same as the block size for TimeDomain
505 // plugins, or half of it for FrequencyDomain plugins 506 // plugins, or half of it for FrequencyDomain plugins
506 } 507 }
507 508
508 size_t 509 size_t
509 NNLSChroma::getMinChannelCount() const 510 NNLSChroma::getMinChannelCount() const
510 { 511 {
511 if (debug_on) cerr << "--> getMinChannelCount" << endl; 512 if (debug_on) cerr << "--> getMinChannelCount" << endl;
512 return 1; 513 return 1;
513 } 514 }
514 515
515 size_t 516 size_t
516 NNLSChroma::getMaxChannelCount() const 517 NNLSChroma::getMaxChannelCount() const
517 { 518 {
518 if (debug_on) cerr << "--> getMaxChannelCount" << endl; 519 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
519 return 1; 520 return 1;
520 } 521 }
521 522
522 NNLSChroma::ParameterList 523 NNLSChroma::ParameterList
523 NNLSChroma::getParameterDescriptors() const 524 NNLSChroma::getParameterDescriptors() const
524 { 525 {
525 if (debug_on) cerr << "--> getParameterDescriptors" << endl; 526 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
526 ParameterList list; 527 ParameterList list;
527 528
528 ParameterDescriptor d3; 529 ParameterDescriptor d3;
529 d3.identifier = "preset"; 530 d3.identifier = "preset";
530 d3.name = "preset"; 531 d3.name = "preset";
531 d3.description = "Spectral paling: no paling - 0; whitening - 1."; 532 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
532 d3.unit = ""; 533 d3.unit = "";
533 d3.isQuantized = true; 534 d3.isQuantized = true;
534 d3.quantizeStep = 1; 535 d3.quantizeStep = 1;
535 d3.minValue = 0.0; 536 d3.minValue = 0.0;
536 d3.maxValue = 3.0; 537 d3.maxValue = 3.0;
537 d3.defaultValue = 0.0; 538 d3.defaultValue = 0.0;
538 d3.valueNames.push_back("polyphonic pop"); 539 d3.valueNames.push_back("polyphonic pop");
539 d3.valueNames.push_back("polyphonic pop (fast)"); 540 d3.valueNames.push_back("polyphonic pop (fast)");
540 d3.valueNames.push_back("solo keyboard"); 541 d3.valueNames.push_back("solo keyboard");
541 d3.valueNames.push_back("manual"); 542 d3.valueNames.push_back("manual");
542 list.push_back(d3); 543 list.push_back(d3);
543 544
544 ParameterDescriptor d5; 545 ParameterDescriptor d5;
545 d5.identifier = "rollon"; 546 d5.identifier = "rollon";
546 d5.name = "spectral roll-on"; 547 d5.name = "spectral roll-on";
547 d5.description = "The bins below the spectral roll-on quantile will be set to 0."; 548 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
548 d5.unit = ""; 549 d5.unit = "";
549 d5.minValue = 0; 550 d5.minValue = 0;
550 d5.maxValue = 1; 551 d5.maxValue = 1;
551 d5.defaultValue = 0; 552 d5.defaultValue = 0;
552 d5.isQuantized = false; 553 d5.isQuantized = false;
553 list.push_back(d5); 554 list.push_back(d5);
554 555
555 // ParameterDescriptor d0; 556 // ParameterDescriptor d0;
556 // d0.identifier = "notedict"; 557 // d0.identifier = "notedict";
557 // d0.name = "note dictionary"; 558 // d0.name = "note dictionary";
558 // d0.description = "Notes in different note dictionaries differ by their spectral shapes."; 559 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
578 d1.valueNames.push_back("global tuning"); 579 d1.valueNames.push_back("global tuning");
579 d1.valueNames.push_back("local tuning"); 580 d1.valueNames.push_back("local tuning");
580 d1.quantizeStep = 1.0; 581 d1.quantizeStep = 1.0;
581 list.push_back(d1); 582 list.push_back(d1);
582 583
583 // ParameterDescriptor d2; 584 // ParameterDescriptor d2;
584 // d2.identifier = "paling"; 585 // d2.identifier = "paling";
585 // d2.name = "spectral paling"; 586 // d2.name = "spectral paling";
586 // d2.description = "Spectral paling: no paling - 0; whitening - 1."; 587 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
587 // d2.unit = ""; 588 // d2.unit = "";
588 // d2.isQuantized = true; 589 // d2.isQuantized = true;
589 // // d2.quantizeStep = 0.1; 590 // // d2.quantizeStep = 0.1;
590 // d2.minValue = 0.0; 591 // d2.minValue = 0.0;
591 // d2.maxValue = 1.0; 592 // d2.maxValue = 1.0;
592 // d2.defaultValue = 1.0; 593 // d2.defaultValue = 1.0;
593 // d2.isQuantized = false; 594 // d2.isQuantized = false;
594 // list.push_back(d2); 595 // list.push_back(d2);
595 ParameterDescriptor d4; 596 ParameterDescriptor d4;
596 d4.identifier = "chromanormalize"; 597 d4.identifier = "chromanormalize";
597 d4.name = "chroma normalization"; 598 d4.name = "chroma normalization";
598 d4.description = "How shall the chroma vector be normalized?"; 599 d4.description = "How shall the chroma vector be normalized?";
599 d4.unit = ""; 600 d4.unit = "";
600 d4.minValue = 0; 601 d4.minValue = 0;
601 d4.maxValue = 3; 602 d4.maxValue = 3;
602 d4.defaultValue = 0; 603 d4.defaultValue = 0;
603 d4.isQuantized = true; 604 d4.isQuantized = true;
604 d4.valueNames.push_back("none"); 605 d4.valueNames.push_back("none");
605 d4.valueNames.push_back("maximum norm"); 606 d4.valueNames.push_back("maximum norm");
606 d4.valueNames.push_back("L1 norm"); 607 d4.valueNames.push_back("L1 norm");
607 d4.valueNames.push_back("L2 norm"); 608 d4.valueNames.push_back("L2 norm");
608 d4.quantizeStep = 1.0; 609 d4.quantizeStep = 1.0;
609 list.push_back(d4); 610 list.push_back(d4);
610 611
611 return list; 612 return list;
612 } 613 }
613 614
614 float 615 float
615 NNLSChroma::getParameter(string identifier) const 616 NNLSChroma::getParameter(string identifier) const
616 { 617 {
617 if (debug_on) cerr << "--> getParameter" << endl; 618 if (debug_on) cerr << "--> getParameter" << endl;
618 if (identifier == "notedict") { 619 if (identifier == "notedict") {
619 return m_dictID; 620 return m_dictID;
620 } 621 }
621 622
622 if (identifier == "paling") { 623 if (identifier == "paling") {
623 return m_paling; 624 return m_paling;
624 } 625 }
625 626
626 if (identifier == "rollon") { 627 if (identifier == "rollon") {
627 return m_rollon; 628 return m_rollon;
628 } 629 }
629 630
630 if (identifier == "tuningmode") { 631 if (identifier == "tuningmode") {
631 if (m_tuneLocal) { 632 if (m_tuneLocal) {
632 return 1.0; 633 return 1.0;
633 } else { 634 } else {
634 return 0.0; 635 return 0.0;
635 } 636 }
636 } 637 }
637 if (identifier == "preset") { 638 if (identifier == "preset") {
638 return m_preset; 639 return m_preset;
639 } 640 }
640 if (identifier == "chromanormalize") { 641 if (identifier == "chromanormalize") {
641 return m_doNormalizeChroma; 642 return m_doNormalizeChroma;
642 } 643 }
643 return 0; 644 return 0;
644 645
645 } 646 }
646 647
647 void 648 void
648 NNLSChroma::setParameter(string identifier, float value) 649 NNLSChroma::setParameter(string identifier, float value)
649 { 650 {
650 if (debug_on) cerr << "--> setParameter" << endl; 651 if (debug_on) cerr << "--> setParameter" << endl;
651 if (identifier == "notedict") { 652 if (identifier == "notedict") {
652 m_dictID = (int) value; 653 m_dictID = (int) value;
653 } 654 }
654 655
655 if (identifier == "paling") { 656 if (identifier == "paling") {
660 m_tuneLocal = (value > 0) ? true : false; 661 m_tuneLocal = (value > 0) ? true : false;
661 // cerr << "m_tuneLocal :" << m_tuneLocal << endl; 662 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
662 } 663 }
663 if (identifier == "preset") { 664 if (identifier == "preset") {
664 m_preset = value; 665 m_preset = value;
665 if (m_preset == 0.0) { 666 if (m_preset == 0.0) {
666 m_tuneLocal = false; 667 m_tuneLocal = false;
667 m_paling = 1.0; 668 m_paling = 1.0;
668 m_dictID = 0.0; 669 m_dictID = 0.0;
669 } 670 }
670 if (m_preset == 1.0) { 671 if (m_preset == 1.0) {
671 m_tuneLocal = false; 672 m_tuneLocal = false;
672 m_paling = 1.0; 673 m_paling = 1.0;
673 m_dictID = 1.0; 674 m_dictID = 1.0;
674 } 675 }
675 if (m_preset == 2.0) { 676 if (m_preset == 2.0) {
676 m_tuneLocal = false; 677 m_tuneLocal = false;
677 m_paling = 0.7; 678 m_paling = 0.7;
678 m_dictID = 0.0; 679 m_dictID = 0.0;
679 } 680 }
680 } 681 }
681 if (identifier == "chromanormalize") { 682 if (identifier == "chromanormalize") {
682 m_doNormalizeChroma = value; 683 m_doNormalizeChroma = value;
683 } 684 }
684 685
685 if (identifier == "rollon") { 686 if (identifier == "rollon") {
686 m_rollon = value; 687 m_rollon = value;
687 } 688 }
688 } 689 }
689 690
690 NNLSChroma::ProgramList 691 NNLSChroma::ProgramList
691 NNLSChroma::getPrograms() const 692 NNLSChroma::getPrograms() const
692 { 693 {
693 if (debug_on) cerr << "--> getPrograms" << endl; 694 if (debug_on) cerr << "--> getPrograms" << endl;
694 ProgramList list; 695 ProgramList list;
695 696
696 // If you have no programs, return an empty list (or simply don't 697 // If you have no programs, return an empty list (or simply don't
697 // implement this function or getCurrentProgram/selectProgram) 698 // implement this function or getCurrentProgram/selectProgram)
698 699
700 } 701 }
701 702
702 string 703 string
703 NNLSChroma::getCurrentProgram() const 704 NNLSChroma::getCurrentProgram() const
704 { 705 {
705 if (debug_on) cerr << "--> getCurrentProgram" << endl; 706 if (debug_on) cerr << "--> getCurrentProgram" << endl;
706 return ""; // no programs 707 return ""; // no programs
707 } 708 }
708 709
709 void 710 void
710 NNLSChroma::selectProgram(string name) 711 NNLSChroma::selectProgram(string name)
711 { 712 {
712 if (debug_on) cerr << "--> selectProgram" << endl; 713 if (debug_on) cerr << "--> selectProgram" << endl;
713 } 714 }
714 715
715 716
716 NNLSChroma::OutputList 717 NNLSChroma::OutputList
717 NNLSChroma::getOutputDescriptors() const 718 NNLSChroma::getOutputDescriptors() const
718 { 719 {
719 if (debug_on) cerr << "--> getOutputDescriptors" << endl; 720 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
720 OutputList list; 721 OutputList list;
721 722
722 // Make chroma names for the binNames property 723 // Make chroma names for the binNames property
723 vector<string> chromanames; 724 vector<string> chromanames;
724 vector<string> bothchromanames; 725 vector<string> bothchromanames;
727 if (iNote < 12) { 728 if (iNote < 12) {
728 chromanames.push_back(notenames[iNote]); 729 chromanames.push_back(notenames[iNote]);
729 } 730 }
730 } 731 }
731 732
732 // int nNote = 84; 733 // int nNote = 84;
733 734
734 // See OutputDescriptor documentation for the possibilities here. 735 // See OutputDescriptor documentation for the possibilities here.
735 // Every plugin must have at least one output. 736 // Every plugin must have at least one output.
736 737
737 OutputDescriptor d0; 738 OutputDescriptor d0;
740 d0.description = "The concert pitch."; 741 d0.description = "The concert pitch.";
741 d0.unit = "Hz"; 742 d0.unit = "Hz";
742 d0.hasFixedBinCount = true; 743 d0.hasFixedBinCount = true;
743 d0.binCount = 0; 744 d0.binCount = 0;
744 d0.hasKnownExtents = true; 745 d0.hasKnownExtents = true;
745 d0.minValue = 427.47; 746 d0.minValue = 427.47;
746 d0.maxValue = 452.89; 747 d0.maxValue = 452.89;
747 d0.isQuantized = false; 748 d0.isQuantized = false;
748 d0.sampleType = OutputDescriptor::VariableSampleRate; 749 d0.sampleType = OutputDescriptor::VariableSampleRate;
749 d0.hasDuration = false; 750 d0.hasDuration = false;
750 list.push_back(d0); 751 list.push_back(d0);
751 752
752 OutputDescriptor d1; 753 OutputDescriptor d1;
753 d1.identifier = "logfreqspec"; 754 d1.identifier = "logfreqspec";
754 d1.name = "Log-Frequency Spectrum"; 755 d1.name = "Log-Frequency Spectrum";
755 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping."; 756 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
756 d1.unit = ""; 757 d1.unit = "";
757 d1.hasFixedBinCount = true; 758 d1.hasFixedBinCount = true;
761 d1.sampleType = OutputDescriptor::FixedSampleRate; 762 d1.sampleType = OutputDescriptor::FixedSampleRate;
762 d1.hasDuration = false; 763 d1.hasDuration = false;
763 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 764 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
764 list.push_back(d1); 765 list.push_back(d1);
765 766
766 OutputDescriptor d2; 767 OutputDescriptor d2;
767 d2.identifier = "tunedlogfreqspec"; 768 d2.identifier = "tunedlogfreqspec";
768 d2.name = "Tuned Log-Frequency Spectrum"; 769 d2.name = "Tuned Log-Frequency Spectrum";
769 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency."; 770 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
770 d2.unit = ""; 771 d2.unit = "";
771 d2.hasFixedBinCount = true; 772 d2.hasFixedBinCount = true;
848 d7.sampleType = OutputDescriptor::VariableSampleRate; 849 d7.sampleType = OutputDescriptor::VariableSampleRate;
849 d7.hasDuration = false; 850 d7.hasDuration = false;
850 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 851 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
851 list.push_back(d7); 852 list.push_back(d7);
852 853
853 // 854 //
854 // OutputDescriptor d9; 855 // OutputDescriptor d9;
855 // d9.identifier = "inconsistencysegment"; 856 // d9.identifier = "inconsistencysegment";
856 // d9.name = "Harmonic inconsistency segmenter"; 857 // d9.name = "Harmonic inconsistency segmenter";
857 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music."; 858 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
858 // d9.unit = ""; 859 // d9.unit = "";
859 // d9.hasFixedBinCount = true; 860 // d9.hasFixedBinCount = true;
860 // d9.binCount = 0; 861 // d9.binCount = 0;
861 // d9.hasKnownExtents = true; 862 // d9.hasKnownExtents = true;
862 // d9.minValue = 0.1; 863 // d9.minValue = 0.1;
863 // d9.maxValue = 0.9; 864 // d9.maxValue = 0.9;
864 // d9.isQuantized = false; 865 // d9.isQuantized = false;
865 // d9.sampleType = OutputDescriptor::VariableSampleRate; 866 // d9.sampleType = OutputDescriptor::VariableSampleRate;
866 // d9.hasDuration = false; 867 // d9.hasDuration = false;
867 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 868 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
868 // list.push_back(d9); 869 // list.push_back(d9);
869 // 870 //
870 OutputDescriptor d10; 871 OutputDescriptor d10;
871 d10.identifier = "localtuning"; 872 d10.identifier = "localtuning";
872 d10.name = "Local tuning"; 873 d10.name = "Local tuning";
873 d10.description = "Tuning based on the history up to this timestamp."; 874 d10.description = "Tuning based on the history up to this timestamp.";
874 d10.unit = "Hz"; 875 d10.unit = "Hz";
875 d10.hasFixedBinCount = true; 876 d10.hasFixedBinCount = true;
876 d10.binCount = 1; 877 d10.binCount = 1;
877 d10.hasKnownExtents = true; 878 d10.hasKnownExtents = true;
878 d10.minValue = 427.47; 879 d10.minValue = 427.47;
879 d10.maxValue = 452.89; 880 d10.maxValue = 452.89;
880 d10.isQuantized = false; 881 d10.isQuantized = false;
881 d10.sampleType = OutputDescriptor::FixedSampleRate; 882 d10.sampleType = OutputDescriptor::FixedSampleRate;
882 d10.hasDuration = false; 883 d10.hasDuration = false;
883 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 884 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
884 list.push_back(d10); 885 list.push_back(d10);
885 886
886 OutputDescriptor d8; 887 OutputDescriptor d8;
887 d8.identifier = "harmonicchange"; 888 d8.identifier = "harmonicchange";
888 d8.name = "Harmonic change value"; 889 d8.name = "Harmonic change value";
889 d8.description = "Harmonic change."; 890 d8.description = "Harmonic change.";
890 d8.unit = ""; 891 d8.unit = "";
891 d8.hasFixedBinCount = true; 892 d8.hasFixedBinCount = true;
892 d8.binCount = 1; 893 d8.binCount = 1;
893 d8.hasKnownExtents = true; 894 d8.hasKnownExtents = true;
894 d8.minValue = 0.0; 895 d8.minValue = 0.0;
895 d8.maxValue = 0.999; 896 d8.maxValue = 0.999;
896 d8.isQuantized = false; 897 d8.isQuantized = false;
897 d8.sampleType = OutputDescriptor::FixedSampleRate; 898 d8.sampleType = OutputDescriptor::FixedSampleRate;
898 d8.hasDuration = false; 899 d8.hasDuration = false;
899 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; 900 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
900 list.push_back(d8); 901 list.push_back(d8);
904 905
905 906
906 bool 907 bool
907 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize) 908 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
908 { 909 {
909 if (debug_on) { 910 if (debug_on) {
910 cerr << "--> initialise"; 911 cerr << "--> initialise";
911 } 912 }
912 913
913 if (channels < getMinChannelCount() || 914 if (channels < getMinChannelCount() ||
914 channels > getMaxChannelCount()) return false; 915 channels > getMaxChannelCount()) return false;
915 m_blockSize = blockSize; 916 m_blockSize = blockSize;
916 m_stepSize = stepSize; 917 m_stepSize = stepSize;
917 frameCount = 0; 918 frameCount = 0;
918 int tempn = 256 * m_blockSize/2; 919 int tempn = 256 * m_blockSize/2;
919 // cerr << "length of tempkernel : " << tempn << endl; 920 // cerr << "length of tempkernel : " << tempn << endl;
920 float *tempkernel; 921 float *tempkernel;
921 922
922 tempkernel = new float[tempn]; 923 tempkernel = new float[tempn];
923 924
924 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); 925 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
925 m_kernelValue.clear(); 926 m_kernelValue.clear();
926 m_kernelFftIndex.clear(); 927 m_kernelFftIndex.clear();
927 m_kernelNoteIndex.clear(); 928 m_kernelNoteIndex.clear();
928 int countNonzero = 0; 929 int countNonzero = 0;
929 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix 930 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
930 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { 931 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
931 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { 932 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
932 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); 933 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
933 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { 934 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
934 countNonzero++; 935 countNonzero++;
935 } 936 }
936 m_kernelFftIndex.push_back(iFFT); 937 m_kernelFftIndex.push_back(iFFT);
937 m_kernelNoteIndex.push_back(iNote); 938 m_kernelNoteIndex.push_back(iNote);
938 } 939 }
939 } 940 }
940 } 941 }
941 // cerr << "nonzero count : " << countNonzero << endl; 942 // cerr << "nonzero count : " << countNonzero << endl;
942 delete [] tempkernel; 943 delete [] tempkernel;
943 ofstream myfile; 944 ofstream myfile;
944 myfile.open ("matrix.txt"); 945 myfile.open ("matrix.txt");
945 // myfile << "Writing this to a file.\n"; 946 // myfile << "Writing this to a file.\n";
946 for (int i = 0; i < nNote * 84; ++i) { 947 for (int i = 0; i < nNote * 84; ++i) {
947 myfile << m_dict[i] << endl; 948 myfile << m_dict[i] << endl;
948 } 949 }
949 myfile.close(); 950 myfile.close();
950 return true; 951 return true;
951 } 952 }
952 953
953 void 954 void
954 NNLSChroma::reset() 955 NNLSChroma::reset()
955 { 956 {
956 if (debug_on) cerr << "--> reset"; 957 if (debug_on) cerr << "--> reset";
957 958
958 // Clear buffers, reset stored values, etc 959 // Clear buffers, reset stored values, etc
959 frameCount = 0; 960 frameCount = 0;
960 m_dictID = 0; 961 m_dictID = 0;
961 m_fl.clear(); 962 m_fl.clear();
962 m_meanTuning0 = 0; 963 m_meanTuning0 = 0;
963 m_meanTuning1 = 0; 964 m_meanTuning1 = 0;
964 m_meanTuning2 = 0; 965 m_meanTuning2 = 0;
965 m_localTuning0 = 0; 966 m_localTuning0 = 0;
966 m_localTuning1 = 0; 967 m_localTuning1 = 0;
967 m_localTuning2 = 0; 968 m_localTuning2 = 0;
968 m_localTuning.clear(); 969 m_localTuning.clear();
969 } 970 }
970 971
971 NNLSChroma::FeatureSet 972 NNLSChroma::FeatureSet
972 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 973 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
973 { 974 {
974 if (debug_on) cerr << "--> process" << endl; 975 if (debug_on) cerr << "--> process" << endl;
975 frameCount++; 976 frameCount++;
976 float *magnitude = new float[m_blockSize/2]; 977 float *magnitude = new float[m_blockSize/2];
977 978
978 Feature f10; // local tuning 979 Feature f10; // local tuning
979 f10.hasTimestamp = true; 980 f10.hasTimestamp = true;
980 f10.timestamp = timestamp; 981 f10.timestamp = timestamp;
981 const float *fbuf = inputBuffers[0]; 982 const float *fbuf = inputBuffers[0];
982 float energysum = 0; 983 float energysum = 0;
983 // make magnitude 984 // make magnitude
984 float maxmag = -10000; 985 float maxmag = -10000;
985 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { 986 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
986 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + 987 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
987 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); 988 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
988 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; 989 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
989 if (m_rollon > 0) { 990 if (m_rollon > 0) {
990 energysum += pow(magnitude[iBin],2); 991 energysum += pow(magnitude[iBin],2);
991 } 992 }
992 } 993 }
993 994
994 float cumenergy = 0; 995 float cumenergy = 0;
995 if (m_rollon > 0) { 996 if (m_rollon > 0) {
996 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) { 997 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
997 cumenergy += pow(magnitude[iBin],2); 998 cumenergy += pow(magnitude[iBin],2);
998 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0; 999 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
999 else break; 1000 else break;
1000 } 1001 }
1001 } 1002 }
1002 1003
1003 if (maxmag < 2) { 1004 if (maxmag < 2) {
1004 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; 1005 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
1005 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { 1006 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
1006 magnitude[iBin] = 0; 1007 magnitude[iBin] = 0;
1007 } 1008 }
1008 } 1009 }
1009 1010
1010 // note magnitude mapping using pre-calculated matrix 1011 // note magnitude mapping using pre-calculated matrix
1011 float *nm = new float[nNote]; // note magnitude 1012 float *nm = new float[nNote]; // note magnitude
1012 for (size_t iNote = 0; iNote < nNote; iNote++) { 1013 for (size_t iNote = 0; iNote < nNote; iNote++) {
1013 nm[iNote] = 0; // initialise as 0 1014 nm[iNote] = 0; // initialise as 0
1014 } 1015 }
1015 int binCount = 0; 1016 int binCount = 0;
1016 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { 1017 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
1017 // cerr << "."; 1018 // cerr << ".";
1018 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; 1019 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
1019 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; 1020 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
1020 binCount++; 1021 binCount++;
1021 } 1022 }
1022 // cerr << nm[20]; 1023 // cerr << nm[20];
1023 // cerr << endl; 1024 // cerr << endl;
1024 1025
1025 1026
1026 float one_over_N = 1.0/frameCount; 1027 float one_over_N = 1.0/frameCount;
1027 // update means of complex tuning variables 1028 // update means of complex tuning variables
1028 m_meanTuning0 *= float(frameCount-1)*one_over_N; 1029 m_meanTuning0 *= float(frameCount-1)*one_over_N;
1031 1032
1032 for (int iTone = 0; iTone < 160; iTone = iTone + 3) { 1033 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
1033 m_meanTuning0 += nm[iTone + 0]*one_over_N; 1034 m_meanTuning0 += nm[iTone + 0]*one_over_N;
1034 m_meanTuning1 += nm[iTone + 1]*one_over_N; 1035 m_meanTuning1 += nm[iTone + 1]*one_over_N;
1035 m_meanTuning2 += nm[iTone + 2]*one_over_N; 1036 m_meanTuning2 += nm[iTone + 2]*one_over_N;
1036 float ratioOld = 0.997; 1037 float ratioOld = 0.997;
1037 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld); 1038 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
1038 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld); 1039 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
1039 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld); 1040 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
1040 } 1041 }
1041 1042
1042 // if (m_tuneLocal) { 1043 // if (m_tuneLocal) {
1043 // local tuning 1044 // local tuning
1044 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; 1045 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
1045 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; 1046 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
1046 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); 1047 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
1047 m_localTuning.push_back(normalisedtuning); 1048 m_localTuning.push_back(normalisedtuning);
1048 float tuning440 = 440 * pow(2,normalisedtuning/12); 1049 float tuning440 = 440 * pow(2,normalisedtuning/12);
1049 f10.values.push_back(tuning440); 1050 f10.values.push_back(tuning440);
1050 // cerr << tuning440 << endl; 1051 // cerr << tuning440 << endl;
1051 // } 1052 // }
1052 1053
1053 Feature f1; // logfreqspec 1054 Feature f1; // logfreqspec
1054 f1.hasTimestamp = true; 1055 f1.hasTimestamp = true;
1055 f1.timestamp = timestamp; 1056 f1.timestamp = timestamp;
1056 for (size_t iNote = 0; iNote < nNote; iNote++) { 1057 for (size_t iNote = 0; iNote < nNote; iNote++) {
1057 f1.values.push_back(nm[iNote]); 1058 f1.values.push_back(nm[iNote]);
1058 } 1059 }
1059 1060
1060 FeatureSet fs; 1061 FeatureSet fs;
1061 fs[1].push_back(f1); 1062 fs[1].push_back(f1);
1062 fs[8].push_back(f10); 1063 fs[8].push_back(f10);
1063 1064
1064 // deletes 1065 // deletes
1065 delete[] magnitude; 1066 delete[] magnitude;
1066 delete[] nm; 1067 delete[] nm;
1067 1068
1068 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures 1069 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
1069 char * pPath; 1070 char * pPath;
1070 pPath = getenv ("VAMP_PATH"); 1071 pPath = getenv ("VAMP_PATH");
1071 1072
1072 1073
1073 return fs; 1074 return fs;
1074 } 1075 }
1075 1076
1076 NNLSChroma::FeatureSet 1077 NNLSChroma::FeatureSet
1077 NNLSChroma::getRemainingFeatures() 1078 NNLSChroma::getRemainingFeatures()
1078 { 1079 {
1079 if (debug_on) cerr << "--> getRemainingFeatures" << endl; 1080 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
1080 FeatureSet fsOut; 1081 FeatureSet fsOut;
1081 if (m_fl.size() == 0) return fsOut; 1082 if (m_fl.size() == 0) return fsOut;
1082 int nChord = m_chordnames.size(); 1083 int nChord = m_chordnames.size();
1083 // 1084 //
1084 /** Calculate Tuning 1085 /** Calculate Tuning
1085 calculate tuning from (using the angle of the complex number defined by the 1086 calculate tuning from (using the angle of the complex number defined by the
1086 cumulative mean real and imag values) 1087 cumulative mean real and imag values)
1087 **/ 1088 **/
1088 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; 1089 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
1089 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; 1090 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
1090 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); 1091 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
1091 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); 1092 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
1092 int intShift = floor(normalisedtuning * 3); 1093 int intShift = floor(normalisedtuning * 3);
1093 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this 1094 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
1094 1095
1095 char buffer0 [50]; 1096 char buffer0 [50];
1096 1097
1097 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); 1098 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
1098 1099
1099 // cerr << "normalisedtuning: " << normalisedtuning << '\n'; 1100 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
1100 1101
1101 // push tuning to FeatureSet fsOut 1102 // push tuning to FeatureSet fsOut
1102 Feature f0; // tuning 1103 Feature f0; // tuning
1103 f0.hasTimestamp = true; 1104 f0.hasTimestamp = true;
1104 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; 1105 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
1105 f0.label = buffer0; 1106 f0.label = buffer0;
1106 fsOut[0].push_back(f0); 1107 fsOut[0].push_back(f0);
1107 1108
1108 /** Tune Log-Frequency Spectrogram 1109 /** Tune Log-Frequency Spectrogram
1109 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to 1110 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
1110 perform linear interpolation on the existing log-frequency spectrogram (kinda f1). 1111 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
1111 **/ 1112 **/
1112 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; 1113 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
1113 1114
1114 float tempValue = 0; 1115 float tempValue = 0;
1115 float dbThreshold = 0; // relative to the background spectrum 1116 float dbThreshold = 0; // relative to the background spectrum
1116 float thresh = pow(10,dbThreshold/20); 1117 float thresh = pow(10,dbThreshold/20);
1117 // cerr << "tune local ? " << m_tuneLocal << endl; 1118 // cerr << "tune local ? " << m_tuneLocal << endl;
1118 int count = 0; 1119 int count = 0;
1119 1120
1120 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { 1121 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
1121 Feature f1 = *i; 1122 Feature f1 = *i;
1122 Feature f2; // tuned log-frequency spectrum 1123 Feature f2; // tuned log-frequency spectrum
1123 f2.hasTimestamp = true; 1124 f2.hasTimestamp = true;
1124 f2.timestamp = f1.timestamp; 1125 f2.timestamp = f1.timestamp;
1125 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero 1126 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
1126 1127
1127 if (m_tuneLocal) { 1128 if (m_tuneLocal) {
1128 intShift = floor(m_localTuning[count] * 3); 1129 intShift = floor(m_localTuning[count] * 3);
1129 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this 1130 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
1130 } 1131 }
1131 1132
1132 // cerr << intShift << " " << intFactor << endl; 1133 // cerr << intShift << " " << intFactor << endl;
1133 1134
1134 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins 1135 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
1135 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; 1136 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
1136 f2.values.push_back(tempValue); 1137 f2.values.push_back(tempValue);
1137 } 1138 }
1138 1139
1139 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge 1140 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
1140 vector<float> runningmean = SpecialConvolution(f2.values,hw); 1141 vector<float> runningmean = SpecialConvolution(f2.values,hw);
1141 vector<float> runningstd; 1142 vector<float> runningstd;
1142 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) 1143 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
1143 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); 1144 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
1144 } 1145 }
1145 runningstd = SpecialConvolution(runningstd,hw); // second step convolve 1146 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
1146 for (int i = 0; i < 256; i++) { 1147 for (int i = 0; i < 256; i++) {
1147 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std 1148 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
1148 if (runningstd[i] > 0) { 1149 if (runningstd[i] > 0) {
1149 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? 1150 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
1150 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; 1151 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
1151 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? 1152 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
1152 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; 1153 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
1153 } 1154 }
1154 if (f2.values[i] < 0) { 1155 if (f2.values[i] < 0) {
1155 cerr << "ERROR: negative value in logfreq spectrum" << endl; 1156 cerr << "ERROR: negative value in logfreq spectrum" << endl;
1156 } 1157 }
1157 } 1158 }
1158 fsOut[2].push_back(f2); 1159 fsOut[2].push_back(f2);
1159 count++; 1160 count++;
1160 } 1161 }
1161 cerr << "done." << endl; 1162 cerr << "done." << endl;
1162 1163
1163 /** Semitone spectrum and chromagrams 1164 /** Semitone spectrum and chromagrams
1164 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum 1165 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
1165 is inferred using a non-negative least squares algorithm. 1166 is inferred using a non-negative least squares algorithm.
1166 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means 1167 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
1167 bass and treble stacked onto each other). 1168 bass and treble stacked onto each other).
1168 **/ 1169 **/
1169 if (m_dictID == 1) { 1170 if (m_dictID == 1) {
1170 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; 1171 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
1171 } else { 1172 } else {
1172 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; 1173 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
1173 } 1174 }
1174 1175
1175 1176
1176 vector<vector<float> > chordogram; 1177 vector<vector<float> > chordogram;
1177 vector<vector<int> > scoreChordogram; 1178 vector<vector<int> > scoreChordogram;
1178 vector<float> chordchange = vector<float>(fsOut[2].size(),0); 1179 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
1179 vector<float> oldchroma = vector<float>(12,0); 1180 vector<float> oldchroma = vector<float>(12,0);
1180 vector<float> oldbasschroma = vector<float>(12,0); 1181 vector<float> oldbasschroma = vector<float>(12,0);
1181 count = 0; 1182 count = 0;
1182 1183
1183 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { 1184 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
1184 Feature f2 = *it; // logfreq spectrum 1185 Feature f2 = *it; // logfreq spectrum
1185 Feature f3; // semitone spectrum 1186 Feature f3; // semitone spectrum
1186 Feature f4; // treble chromagram 1187 Feature f4; // treble chromagram
1187 Feature f5; // bass chromagram 1188 Feature f5; // bass chromagram
1188 Feature f6; // treble and bass chromagram 1189 Feature f6; // treble and bass chromagram
1189 1190
1190 f3.hasTimestamp = true; 1191 f3.hasTimestamp = true;
1191 f3.timestamp = f2.timestamp; 1192 f3.timestamp = f2.timestamp;
1192 1193
1193 f4.hasTimestamp = true; 1194 f4.hasTimestamp = true;
1194 f4.timestamp = f2.timestamp; 1195 f4.timestamp = f2.timestamp;
1195 1196
1196 f5.hasTimestamp = true; 1197 f5.hasTimestamp = true;
1197 f5.timestamp = f2.timestamp; 1198 f5.timestamp = f2.timestamp;
1198 1199
1199 f6.hasTimestamp = true; 1200 f6.hasTimestamp = true;
1200 f6.timestamp = f2.timestamp; 1201 f6.timestamp = f2.timestamp;
1201 1202
1202 double b[256]; 1203 double b[256];
1203 1204
1204 bool some_b_greater_zero = false; 1205 bool some_b_greater_zero = false;
1205 float sumb = 0; 1206 float sumb = 0;
1206 for (int i = 0; i < 256; i++) { 1207 for (int i = 0; i < 256; i++) {
1207 // b[i] = m_dict[(256 * count + i) % (256 * 84)]; 1208 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
1208 b[i] = f2.values[i]; 1209 b[i] = f2.values[i];
1209 sumb += b[i]; 1210 sumb += b[i];
1210 if (b[i] > 0) { 1211 if (b[i] > 0) {
1211 some_b_greater_zero = true; 1212 some_b_greater_zero = true;
1212 } 1213 }
1213 } 1214 }
1214 1215
1215 // here's where the non-negative least squares algorithm calculates the note activation x 1216 // here's where the non-negative least squares algorithm calculates the note activation x
1216 1217
1217 vector<float> chroma = vector<float>(12, 0); 1218 vector<float> chroma = vector<float>(12, 0);
1218 vector<float> basschroma = vector<float>(12, 0); 1219 vector<float> basschroma = vector<float>(12, 0);
1219 float currval; 1220 float currval;
1220 unsigned iSemitone = 0; 1221 unsigned iSemitone = 0;
1221 1222
1222 if (some_b_greater_zero) { 1223 if (some_b_greater_zero) {
1223 if (m_dictID == 1) { 1224 if (m_dictID == 1) {
1224 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { 1225 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
1225 currval = 0; 1226 currval = 0;
1226 currval += b[iNote + 1 + -1] * 0.5; 1227 currval += b[iNote + 1 + -1] * 0.5;
1227 currval += b[iNote + 1 + 0] * 1.0; 1228 currval += b[iNote + 1 + 0] * 1.0;
1228 currval += b[iNote + 1 + 1] * 0.5; 1229 currval += b[iNote + 1 + 1] * 0.5;
1229 f3.values.push_back(currval); 1230 f3.values.push_back(currval);
1230 chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; 1231 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
1231 basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; 1232 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
1232 iSemitone++; 1233 iSemitone++;
1233 } 1234 }
1234 1235
1235 } else { 1236 } else {
1236 double x[84+1000]; 1237 double x[84+1000];
1237 for (int i = 1; i < 1084; ++i) x[i] = 1.0; 1238 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
1238 vector<int> signifIndex; 1239 vector<int> signifIndex;
1239 int index=0; 1240 int index=0;
1240 sumb /= 84.0; 1241 sumb /= 84.0;
1241 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { 1242 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
1242 float currval = 0; 1243 float currval = 0;
1243 currval += b[iNote + 1 + -1]; 1244 currval += b[iNote + 1 + -1];
1244 currval += b[iNote + 1 + 0]; 1245 currval += b[iNote + 1 + 0];
1245 currval += b[iNote + 1 + 1]; 1246 currval += b[iNote + 1 + 1];
1246 if (currval > 0) signifIndex.push_back(index); 1247 if (currval > 0) signifIndex.push_back(index);
1247 f3.values.push_back(0); // fill the values, change later 1248 f3.values.push_back(0); // fill the values, change later
1248 index++; 1249 index++;
1249 } 1250 }
1250 double rnorm; 1251 double rnorm;
1251 double w[84+1000]; 1252 double w[84+1000];
1252 double zz[84+1000]; 1253 double zz[84+1000];
1253 int indx[84+1000]; 1254 int indx[84+1000];
1254 int mode; 1255 int mode;
1255 int dictsize = 256*signifIndex.size(); 1256 int dictsize = 256*signifIndex.size();
1256 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; 1257 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
1257 double *curr_dict = new double[dictsize]; 1258 double *curr_dict = new double[dictsize];
1258 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { 1259 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
1259 for (unsigned iBin = 0; iBin < 256; iBin++) { 1260 for (unsigned iBin = 0; iBin < 256; iBin++) {
1260 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; 1261 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
1261 } 1262 }
1262 } 1263 }
1263 int sz = signifIndex.size(); 1264 int sz = signifIndex.size();
1264 int nn = nNote; 1265 int nn = nNote;
1265 NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode); 1266 NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode);
1266 delete [] curr_dict; 1267 delete [] curr_dict;
1267 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { 1268 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
1268 f3.values[signifIndex[iNote]] = x[iNote]; 1269 f3.values[signifIndex[iNote]] = x[iNote];
1269 // cerr << mode << endl; 1270 // cerr << mode << endl;
1270 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; 1271 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
1271 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; 1272 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
1272 } 1273 }
1273 } 1274 }
1274 } 1275 }
1275 1276
1276 1277
1277 1278
1278 1279
1279 f4.values = chroma; 1280 f4.values = chroma;
1280 f5.values = basschroma; 1281 f5.values = basschroma;
1281 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas 1282 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
1282 f6.values = chroma; 1283 f6.values = chroma;
1283 1284
1284 if (m_doNormalizeChroma > 0) { 1285 if (m_doNormalizeChroma > 0) {
1285 vector<float> chromanorm = vector<float>(3,0); 1286 vector<float> chromanorm = vector<float>(3,0);
1286 switch (int(m_doNormalizeChroma)) { 1287 switch (int(m_doNormalizeChroma)) {
1287 case 0: // should never end up here 1288 case 0: // should never end up here
1288 break; 1289 break;
1289 case 1: 1290 case 1:
1290 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end()); 1291 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
1291 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end()); 1292 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
1292 chromanorm[2] = max(chromanorm[0], chromanorm[1]); 1293 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
1293 break; 1294 break;
1294 case 2: 1295 case 2:
1295 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { 1296 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
1296 chromanorm[0] += *it; 1297 chromanorm[0] += *it;
1297 } 1298 }
1298 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { 1299 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
1299 chromanorm[1] += *it; 1300 chromanorm[1] += *it;
1300 } 1301 }
1301 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { 1302 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
1302 chromanorm[2] += *it; 1303 chromanorm[2] += *it;
1303 } 1304 }
1304 break; 1305 break;
1305 case 3: 1306 case 3:
1306 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { 1307 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
1307 chromanorm[0] += pow(*it,2); 1308 chromanorm[0] += pow(*it,2);
1308 } 1309 }
1309 chromanorm[0] = sqrt(chromanorm[0]); 1310 chromanorm[0] = sqrt(chromanorm[0]);
1310 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { 1311 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
1311 chromanorm[1] += pow(*it,2); 1312 chromanorm[1] += pow(*it,2);
1312 } 1313 }
1313 chromanorm[1] = sqrt(chromanorm[1]); 1314 chromanorm[1] = sqrt(chromanorm[1]);
1314 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { 1315 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
1315 chromanorm[2] += pow(*it,2); 1316 chromanorm[2] += pow(*it,2);
1316 } 1317 }
1317 chromanorm[2] = sqrt(chromanorm[2]); 1318 chromanorm[2] = sqrt(chromanorm[2]);
1318 break; 1319 break;
1319 } 1320 }
1320 if (chromanorm[0] > 0) { 1321 if (chromanorm[0] > 0) {
1321 for (int i = 0; i < f4.values.size(); i++) { 1322 for (int i = 0; i < f4.values.size(); i++) {
1322 f4.values[i] /= chromanorm[0]; 1323 f4.values[i] /= chromanorm[0];
1323 } 1324 }
1324 } 1325 }
1325 if (chromanorm[1] > 0) { 1326 if (chromanorm[1] > 0) {
1326 for (int i = 0; i < f5.values.size(); i++) { 1327 for (int i = 0; i < f5.values.size(); i++) {
1327 f5.values[i] /= chromanorm[1]; 1328 f5.values[i] /= chromanorm[1];
1328 } 1329 }
1329 } 1330 }
1330 if (chromanorm[2] > 0) { 1331 if (chromanorm[2] > 0) {
1331 for (int i = 0; i < f6.values.size(); i++) { 1332 for (int i = 0; i < f6.values.size(); i++) {
1332 f6.values[i] /= chromanorm[2]; 1333 f6.values[i] /= chromanorm[2];
1333 } 1334 }
1334 } 1335 }
1335 1336
1336 } 1337 }
1337 1338
1338 // local chord estimation 1339 // local chord estimation
1339 vector<float> currentChordSalience; 1340 vector<float> currentChordSalience;
1340 float tempchordvalue = 0; 1341 float tempchordvalue = 0;
1341 float sumchordvalue = 0; 1342 float sumchordvalue = 0;
1342 1343
1343 for (int iChord = 0; iChord < nChord; iChord++) { 1344 for (int iChord = 0; iChord < nChord; iChord++) {
1344 tempchordvalue = 0; 1345 tempchordvalue = 0;
1345 for (int iBin = 0; iBin < 12; iBin++) { 1346 for (int iBin = 0; iBin < 12; iBin++) {
1346 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; 1347 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
1347 } 1348 }
1348 for (int iBin = 12; iBin < 24; iBin++) { 1349 for (int iBin = 12; iBin < 24; iBin++) {
1349 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; 1350 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
1350 } 1351 }
1351 sumchordvalue+=tempchordvalue; 1352 sumchordvalue+=tempchordvalue;
1352 currentChordSalience.push_back(tempchordvalue); 1353 currentChordSalience.push_back(tempchordvalue);
1353 } 1354 }
1354 if (sumchordvalue > 0) { 1355 if (sumchordvalue > 0) {
1355 for (int iChord = 0; iChord < nChord; iChord++) { 1356 for (int iChord = 0; iChord < nChord; iChord++) {
1356 currentChordSalience[iChord] /= sumchordvalue; 1357 currentChordSalience[iChord] /= sumchordvalue;
1357 } 1358 }
1358 } else { 1359 } else {
1359 currentChordSalience[nChord-1] = 1.0; 1360 currentChordSalience[nChord-1] = 1.0;
1360 } 1361 }
1361 chordogram.push_back(currentChordSalience); 1362 chordogram.push_back(currentChordSalience);
1362 1363
1363 fsOut[3].push_back(f3); 1364 fsOut[3].push_back(f3);
1364 fsOut[4].push_back(f4); 1365 fsOut[4].push_back(f4);
1365 fsOut[5].push_back(f5); 1366 fsOut[5].push_back(f5);
1366 fsOut[6].push_back(f6); 1367 fsOut[6].push_back(f6);
1367 count++; 1368 count++;
1368 } 1369 }
1369 cerr << "done." << endl; 1370 cerr << "done." << endl;
1370 1371
1371 1372
1372 /* Simple chord estimation 1373 /* Simple chord estimation
1373 I just take the local chord estimates ("currentChordSalience") and average them over time, then 1374 I just take the local chord estimates ("currentChordSalience") and average them over time, then
1374 take the maximum. Very simple, don't do this at home... 1375 take the maximum. Very simple, don't do this at home...
1375 */ 1376 */
1376 cerr << "[NNLS Chroma Plugin] Chord Estimation ... "; 1377 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
1377 count = 0; 1378 count = 0;
1378 int halfwindowlength = m_inputSampleRate / m_stepSize; 1379 int halfwindowlength = m_inputSampleRate / m_stepSize;
1379 vector<int> chordSequence; 1380 vector<int> chordSequence;
1380 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram 1381 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
1381 vector<int> temp = vector<int>(nChord,0); 1382 vector<int> temp = vector<int>(nChord,0);
1382 scoreChordogram.push_back(temp); 1383 scoreChordogram.push_back(temp);
1383 } 1384 }
1384 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { 1385 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
1385 int startIndex = count + 1; 1386 int startIndex = count + 1;
1386 int endIndex = count + 2 * halfwindowlength; 1387 int endIndex = count + 2 * halfwindowlength;
1387 1388
1388 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); 1389 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
1389 1390
1390 vector<int> chordCandidates; 1391 vector<int> chordCandidates;
1391 for (unsigned iChord = 0; iChord < nChord-1; iChord++) { 1392 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
1392 // float currsum = 0; 1393 // float currsum = 0;
1393 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { 1394 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
1394 // currsum += chordogram[iFrame][iChord]; 1395 // currsum += chordogram[iFrame][iChord];
1395 // } 1396 // }
1396 // if (currsum > chordThreshold) chordCandidates.push_back(iChord); 1397 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
1397 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { 1398 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
1398 if (chordogram[iFrame][iChord] > chordThreshold) { 1399 if (chordogram[iFrame][iChord] > chordThreshold) {
1399 chordCandidates.push_back(iChord); 1400 chordCandidates.push_back(iChord);
1400 break; 1401 break;
1401 } 1402 }
1402 } 1403 }
1403 } 1404 }
1404 chordCandidates.push_back(nChord-1); 1405 chordCandidates.push_back(nChord-1);
1405 // cerr << chordCandidates.size() << endl; 1406 // cerr << chordCandidates.size() << endl;
1406 1407
1407 float maxval = 0; // will be the value of the most salient *chord change* in this frame 1408 float maxval = 0; // will be the value of the most salient *chord change* in this frame
1408 float maxindex = 0; //... and the index thereof 1409 float maxindex = 0; //... and the index thereof
1409 unsigned bestchordL = nChord-1; // index of the best "left" chord 1410 unsigned bestchordL = nChord-1; // index of the best "left" chord
1410 unsigned bestchordR = nChord-1; // index of the best "right" chord 1411 unsigned bestchordR = nChord-1; // index of the best "right" chord
1411 1412
1412 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { 1413 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
1413 // now find the max values on both sides of iWF 1414 // now find the max values on both sides of iWF
1414 // left side: 1415 // left side:
1415 float maxL = 0; 1416 float maxL = 0;
1416 unsigned maxindL = nChord-1; 1417 unsigned maxindL = nChord-1;
1417 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { 1418 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
1418 unsigned iChord = chordCandidates[kChord]; 1419 unsigned iChord = chordCandidates[kChord];
1419 float currsum = 0; 1420 float currsum = 0;
1420 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { 1421 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
1421 currsum += chordogram[count+iFrame][iChord]; 1422 currsum += chordogram[count+iFrame][iChord];
1422 } 1423 }
1423 if (iChord == nChord-1) currsum *= 0.8; 1424 if (iChord == nChord-1) currsum *= 0.8;
1424 if (currsum > maxL) { 1425 if (currsum > maxL) {
1425 maxL = currsum; 1426 maxL = currsum;
1426 maxindL = iChord; 1427 maxindL = iChord;
1427 } 1428 }
1428 } 1429 }
1429 // right side: 1430 // right side:
1430 float maxR = 0; 1431 float maxR = 0;
1431 unsigned maxindR = nChord-1; 1432 unsigned maxindR = nChord-1;
1432 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { 1433 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
1433 unsigned iChord = chordCandidates[kChord]; 1434 unsigned iChord = chordCandidates[kChord];
1434 float currsum = 0; 1435 float currsum = 0;
1435 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { 1436 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
1436 currsum += chordogram[count+iFrame][iChord]; 1437 currsum += chordogram[count+iFrame][iChord];
1437 } 1438 }
1438 if (iChord == nChord-1) currsum *= 0.8; 1439 if (iChord == nChord-1) currsum *= 0.8;
1439 if (currsum > maxR) { 1440 if (currsum > maxR) {
1440 maxR = currsum; 1441 maxR = currsum;
1441 maxindR = iChord; 1442 maxindR = iChord;
1442 } 1443 }
1443 } 1444 }
1444 if (maxL+maxR > maxval) { 1445 if (maxL+maxR > maxval) {
1445 maxval = maxL+maxR; 1446 maxval = maxL+maxR;
1446 maxindex = iWF; 1447 maxindex = iWF;
1447 bestchordL = maxindL; 1448 bestchordL = maxindL;
1448 bestchordR = maxindR; 1449 bestchordR = maxindR;
1449 } 1450 }
1450 1451
1451 } 1452 }
1452 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; 1453 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
1453 // add a score to every chord-frame-point that was part of a maximum 1454 // add a score to every chord-frame-point that was part of a maximum
1454 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { 1455 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
1455 scoreChordogram[iFrame+count][bestchordL]++; 1456 scoreChordogram[iFrame+count][bestchordL]++;
1456 } 1457 }
1457 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { 1458 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
1458 scoreChordogram[iFrame+count][bestchordR]++; 1459 scoreChordogram[iFrame+count][bestchordR]++;
1459 } 1460 }
1460 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; 1461 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
1461 count++; 1462 count++;
1462 } 1463 }
1463 // cerr << "******* agent finished *******" << endl; 1464 // cerr << "******* agent finished *******" << endl;
1464 count = 0; 1465 count = 0;
1465 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { 1466 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
1466 float maxval = 0; // will be the value of the most salient chord in this frame 1467 float maxval = 0; // will be the value of the most salient chord in this frame
1467 float maxindex = 0; //... and the index thereof 1468 float maxindex = 0; //... and the index thereof
1468 for (unsigned iChord = 0; iChord < nChord; iChord++) { 1469 for (unsigned iChord = 0; iChord < nChord; iChord++) {
1469 if (scoreChordogram[count][iChord] > maxval) { 1470 if (scoreChordogram[count][iChord] > maxval) {
1470 maxval = scoreChordogram[count][iChord]; 1471 maxval = scoreChordogram[count][iChord];
1471 maxindex = iChord; 1472 maxindex = iChord;
1472 // cerr << iChord << endl; 1473 // cerr << iChord << endl;
1473 } 1474 }
1474 } 1475 }
1475 chordSequence.push_back(maxindex); 1476 chordSequence.push_back(maxindex);
1476 // cerr << "before modefilter, maxindex: " << maxindex << endl; 1477 // cerr << "before modefilter, maxindex: " << maxindex << endl;
1477 count++; 1478 count++;
1478 } 1479 }
1479 // cerr << "******* mode filter done *******" << endl; 1480 // cerr << "******* mode filter done *******" << endl;
1480 1481
1481 1482
1482 // mode filter on chordSequence 1483 // mode filter on chordSequence
1483 count = 0; 1484 count = 0;
1484 string oldChord = ""; 1485 string oldChord = "";
1485 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { 1486 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
1486 Feature f6 = *it; 1487 Feature f6 = *it;
1487 Feature f7; // chord estimate 1488 Feature f7; // chord estimate
1488 f7.hasTimestamp = true; 1489 f7.hasTimestamp = true;
1489 f7.timestamp = f6.timestamp; 1490 f7.timestamp = f6.timestamp;
1490 Feature f8; // chord estimate 1491 Feature f8; // chord estimate
1491 f8.hasTimestamp = true; 1492 f8.hasTimestamp = true;
1492 f8.timestamp = f6.timestamp; 1493 f8.timestamp = f6.timestamp;
1493 1494
1494 vector<int> chordCount = vector<int>(nChord,0); 1495 vector<int> chordCount = vector<int>(nChord,0);
1495 int maxChordCount = 0; 1496 int maxChordCount = 0;
1496 int maxChordIndex = nChord-1; 1497 int maxChordIndex = nChord-1;
1497 string maxChord; 1498 string maxChord;
1498 int startIndex = max(count - halfwindowlength/2,0); 1499 int startIndex = max(count - halfwindowlength/2,0);
1499 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); 1500 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
1500 for (int i = startIndex; i < endIndex; i++) { 1501 for (int i = startIndex; i < endIndex; i++) {
1501 chordCount[chordSequence[i]]++; 1502 chordCount[chordSequence[i]]++;
1502 if (chordCount[chordSequence[i]] > maxChordCount) { 1503 if (chordCount[chordSequence[i]] > maxChordCount) {
1503 // cerr << "start index " << startIndex << endl; 1504 // cerr << "start index " << startIndex << endl;
1504 maxChordCount++; 1505 maxChordCount++;
1505 maxChordIndex = chordSequence[i]; 1506 maxChordIndex = chordSequence[i];
1506 maxChord = m_chordnames[maxChordIndex]; 1507 maxChord = m_chordnames[maxChordIndex];
1507 } 1508 }
1508 } 1509 }
1509 // chordSequence[count] = maxChordIndex; 1510 // chordSequence[count] = maxChordIndex;
1510 // cerr << maxChordIndex << endl; 1511 // cerr << maxChordIndex << endl;
1511 f8.values.push_back(chordchange[count]/(halfwindowlength*2)); 1512 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
1512 // cerr << chordchange[count] << endl; 1513 // cerr << chordchange[count] << endl;
1513 fsOut[9].push_back(f8); 1514 fsOut[9].push_back(f8);
1514 if (oldChord != maxChord) { 1515 if (oldChord != maxChord) {
1515 oldChord = maxChord; 1516 oldChord = maxChord;
1516 1517
1517 // char buffer1 [50]; 1518 // char buffer1 [50];
1518 // if (maxChordIndex < nChord - 1) { 1519 // if (maxChordIndex < nChord - 1) {
1519 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); 1520 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
1520 // } else { 1521 // } else {
1521 // sprintf(buffer1, "N"); 1522 // sprintf(buffer1, "N");
1522 // } 1523 // }
1523 // f7.label = buffer1; 1524 // f7.label = buffer1;
1524 f7.label = m_chordnames[maxChordIndex]; 1525 f7.label = m_chordnames[maxChordIndex];
1525 fsOut[7].push_back(f7); 1526 fsOut[7].push_back(f7);
1526 } 1527 }
1527 count++; 1528 count++;
1528 } 1529 }
1529 Feature f7; // last chord estimate 1530 Feature f7; // last chord estimate
1530 f7.hasTimestamp = true; 1531 f7.hasTimestamp = true;
1531 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp; 1532 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
1532 f7.label = "N"; 1533 f7.label = "N";
1533 fsOut[7].push_back(f7); 1534 fsOut[7].push_back(f7);
1534 cerr << "done." << endl; 1535 cerr << "done." << endl;
1535 // // musicity 1536 // // musicity
1536 // count = 0; 1537 // count = 0;
1537 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 1538 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
1538 // vector<float> musicityValue; 1539 // vector<float> musicityValue;
1539 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { 1540 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
1540 // Feature f4 = *it; 1541 // Feature f4 = *it;
1541 // 1542 //
1542 // int startIndex = max(count - musicitykernelwidth/2,0); 1543 // int startIndex = max(count - musicitykernelwidth/2,0);
1543 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); 1544 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
1544 // float chromasum = 0; 1545 // float chromasum = 0;
1545 // float diffsum = 0; 1546 // float diffsum = 0;
1546 // for (int k = 0; k < 12; k++) { 1547 // for (int k = 0; k < 12; k++) {
1547 // for (int i = startIndex + 1; i < endIndex; i++) { 1548 // for (int i = startIndex + 1; i < endIndex; i++) {
1548 // chromasum += pow(fsOut[4][i].values[k],2); 1549 // chromasum += pow(fsOut[4][i].values[k],2);
1549 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); 1550 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
1550 // } 1551 // }
1551 // } 1552 // }
1552 // diffsum /= chromasum; 1553 // diffsum /= chromasum;
1553 // musicityValue.push_back(diffsum); 1554 // musicityValue.push_back(diffsum);
1554 // count++; 1555 // count++;
1555 // } 1556 // }
1556 // 1557 //
1557 // float musicityThreshold = 0.44; 1558 // float musicityThreshold = 0.44;
1558 // if (m_stepSize == 4096) { 1559 // if (m_stepSize == 4096) {
1559 // musicityThreshold = 0.74; 1560 // musicityThreshold = 0.74;
1560 // } 1561 // }
1561 // if (m_stepSize == 4410) { 1562 // if (m_stepSize == 4410) {
1562 // musicityThreshold = 0.77; 1563 // musicityThreshold = 0.77;
1563 // } 1564 // }
1564 // 1565 //
1565 // count = 0; 1566 // count = 0;
1566 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { 1567 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
1567 // Feature f4 = *it; 1568 // Feature f4 = *it;
1568 // Feature f8; // musicity 1569 // Feature f8; // musicity
1569 // Feature f9; // musicity segmenter 1570 // Feature f9; // musicity segmenter
1570 // 1571 //
1571 // f8.hasTimestamp = true; 1572 // f8.hasTimestamp = true;
1572 // f8.timestamp = f4.timestamp; 1573 // f8.timestamp = f4.timestamp;
1573 // f9.hasTimestamp = true; 1574 // f9.hasTimestamp = true;
1574 // f9.timestamp = f4.timestamp; 1575 // f9.timestamp = f4.timestamp;
1575 // 1576 //
1576 // int startIndex = max(count - musicitykernelwidth/2,0); 1577 // int startIndex = max(count - musicitykernelwidth/2,0);
1577 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); 1578 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
1578 // int musicityCount = 0; 1579 // int musicityCount = 0;
1579 // for (int i = startIndex; i <= endIndex; i++) { 1580 // for (int i = startIndex; i <= endIndex; i++) {
1580 // if (musicityValue[i] > musicityThreshold) musicityCount++; 1581 // if (musicityValue[i] > musicityThreshold) musicityCount++;
1581 // } 1582 // }
1582 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); 1583 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
1583 // 1584 //
1584 // if (isSpeech) { 1585 // if (isSpeech) {
1585 // if (oldlabeltype != 2) { 1586 // if (oldlabeltype != 2) {
1586 // f9.label = "Speech"; 1587 // f9.label = "Speech";
1587 // fsOut[9].push_back(f9); 1588 // fsOut[9].push_back(f9);
1588 // oldlabeltype = 2; 1589 // oldlabeltype = 2;
1589 // } 1590 // }
1590 // } else { 1591 // } else {
1591 // if (oldlabeltype != 1) { 1592 // if (oldlabeltype != 1) {
1592 // f9.label = "Music"; 1593 // f9.label = "Music";
1593 // fsOut[9].push_back(f9); 1594 // fsOut[9].push_back(f9);
1594 // oldlabeltype = 1; 1595 // oldlabeltype = 1;
1595 // } 1596 // }
1596 // } 1597 // }
1597 // f8.values.push_back(musicityValue[count]); 1598 // f8.values.push_back(musicityValue[count]);
1598 // fsOut[8].push_back(f8); 1599 // fsOut[8].push_back(f8);
1599 // count++; 1600 // count++;
1600 // } 1601 // }
1601 return fsOut; 1602 return fsOut;
1602 1603
1603 } 1604 }
1604 1605