annotate NNLSChroma.cpp @ 1:2a491d71057d matthiasm-plugin

dictionary matrix, included nnls, next step will be nnls computation
author matthiasm
date Wed, 19 May 2010 11:38:48 +0000
parents 8aa2e8b3a778
children 8360483a026e
rev   line source
matthiasm@0 1
matthiasm@0 2 #include "NNLSChroma.h"
matthiasm@0 3 #include <cmath>
matthiasm@0 4 #include <list>
matthiasm@0 5 #include <iostream>
matthiasm@0 6 #include <sstream>
matthiasm@0 7 #include <cassert>
matthiasm@0 8 #include <cstdio>
matthiasm@1 9 #include "nnls.h"
matthiasm@0 10 // #include "cblas.h"
matthiasm@0 11 #include "chorddict.cpp"
matthiasm@0 12 using namespace std;
matthiasm@0 13
matthiasm@0 14 const float sinvalue = 0.866025404;
matthiasm@0 15 const float cosvalue = -0.5;
matthiasm@0 16 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
matthiasm@0 17 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
matthiasm@0 18 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
matthiasm@0 19 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
matthiasm@0 20 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
matthiasm@0 21 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 22 const int nNote = 256;
matthiasm@0 23
matthiasm@0 24 /** Special Convolution
matthiasm@0 25 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
matthiasm@0 26 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
matthiasm@0 27 as the first (last) valid convolution bin.
matthiasm@0 28 **/
matthiasm@0 29
matthiasm@0 30 const bool debug_on = false;
matthiasm@0 31
matthiasm@0 32 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
matthiasm@0 33 {
matthiasm@0 34 float s;
matthiasm@0 35 int m, n;
matthiasm@0 36 int lenConvolvee = convolvee.size();
matthiasm@0 37 int lenKernel = kernel.size();
matthiasm@0 38
matthiasm@0 39 vector<float> Z(256,0);
matthiasm@0 40 assert(lenKernel % 2 != 0); // no exception handling !!!
matthiasm@0 41
matthiasm@0 42 for (n = lenKernel - 1; n < lenConvolvee; n++) {
matthiasm@0 43 s=0.0;
matthiasm@0 44 for (m = 0; m < lenKernel; m++) {
matthiasm@0 45 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
matthiasm@0 46 s += convolvee[n-m] * kernel[m];
matthiasm@0 47 // if (debug_on) cerr << "--> s = " << s << '\n';
matthiasm@0 48 }
matthiasm@0 49 // cerr << n - lenKernel/2 << endl;
matthiasm@0 50 Z[n -lenKernel/2] = s;
matthiasm@0 51 }
matthiasm@0 52
matthiasm@0 53 // fill upper and lower pads
matthiasm@0 54 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
matthiasm@0 55 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
matthiasm@0 56 Z[lenConvolvee - lenKernel/2 - 1];
matthiasm@0 57 return Z;
matthiasm@0 58 }
matthiasm@0 59
matthiasm@0 60 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
matthiasm@0 61 // {
matthiasm@0 62 // vector<float> freq(binnumbers.size, 0.0);
matthiasm@0 63 // for (unsigned i = 0; i < binnumbers.size; ++i) {
matthiasm@0 64 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
matthiasm@0 65 // }
matthiasm@0 66 // return freq;
matthiasm@0 67 // }
matthiasm@0 68
matthiasm@0 69 float cospuls(float x, float centre, float width)
matthiasm@0 70 {
matthiasm@0 71 float recipwidth = 1.0/width;
matthiasm@0 72 if (abs(x - centre) <= 0.5 * width) {
matthiasm@0 73 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
matthiasm@0 74 }
matthiasm@0 75 return 0.0;
matthiasm@0 76 }
matthiasm@0 77
matthiasm@0 78 float pitchCospuls(float x, float centre, int binsperoctave)
matthiasm@0 79 {
matthiasm@0 80 float warpedf = -binsperoctave * (log2(centre) - log2(x));
matthiasm@0 81 float out = cospuls(warpedf, 0.0, 2.0);
matthiasm@0 82 // now scale to correct for note density
matthiasm@0 83 float c = log(2.0)/binsperoctave;
matthiasm@0 84 if (x > 0) {
matthiasm@0 85 out = out / (c * x);
matthiasm@0 86 } else {
matthiasm@0 87 out = 0;
matthiasm@0 88 }
matthiasm@0 89 return out;
matthiasm@0 90 }
matthiasm@0 91
matthiasm@0 92 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
matthiasm@0 93
matthiasm@0 94 int binspersemitone = 3; // this must be 3
matthiasm@0 95 int minoctave = 0; // this must be 0
matthiasm@0 96 int maxoctave = 7; // this must be 7
matthiasm@1 97 int oversampling = 80;
matthiasm@0 98
matthiasm@0 99 // linear frequency vector
matthiasm@0 100 vector<float> fft_f;
matthiasm@0 101 for (int i = 0; i < blocksize/2; ++i) {
matthiasm@0 102 fft_f.push_back(i * (fs * 1.0 / blocksize));
matthiasm@0 103 }
matthiasm@0 104 float fft_width = fs * 2.0 / blocksize;
matthiasm@0 105
matthiasm@0 106 // linear oversampled frequency vector
matthiasm@0 107 vector<float> oversampled_f;
matthiasm@0 108 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
matthiasm@0 109 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
matthiasm@0 110 }
matthiasm@0 111
matthiasm@0 112 // pitch-spaced frequency vector
matthiasm@0 113 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@0 114 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@0 115 vector<float> cq_f;
matthiasm@0 116 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@0 117 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@0 118 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@0 119 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@0 120 for (int k = -1; k < 2; ++k) {
matthiasm@0 121 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@0 122 }
matthiasm@0 123 }
matthiasm@0 124 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@0 125 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@0 126
matthiasm@0 127 int nFFT = fft_f.size();
matthiasm@0 128
matthiasm@0 129 vector<float> fft_activation;
matthiasm@0 130 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
matthiasm@0 131 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
matthiasm@0 132 fft_activation.push_back(cosp);
matthiasm@0 133 // cerr << cosp << endl;
matthiasm@0 134 }
matthiasm@0 135
matthiasm@0 136 float cq_activation;
matthiasm@0 137 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
matthiasm@0 138 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
matthiasm@0 139 int curr_start = oversampling * iFFT - oversampling;
matthiasm@0 140 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
matthiasm@0 141 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
matthiasm@0 142 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
matthiasm@0 143 outmatrix[iFFT + nFFT * iCQ] = 0;
matthiasm@1 144 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
matthiasm@0 145 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
matthiasm@0 146 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
matthiasm@0 147 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
matthiasm@0 148 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
matthiasm@0 149 }
matthiasm@0 150 // if (iCQ == 1 || iCQ == 2) {
matthiasm@0 151 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
matthiasm@0 152 // }
matthiasm@0 153 }
matthiasm@0 154 }
matthiasm@0 155 }
matthiasm@0 156 return true;
matthiasm@0 157 }
matthiasm@0 158
matthiasm@1 159 bool dictionaryMatrix(double* dm) {
matthiasm@1 160 int binspersemitone = 3; // this must be 3
matthiasm@1 161 int minoctave = 0; // this must be 0
matthiasm@1 162 int maxoctave = 7; // this must be 7
matthiasm@1 163 float s_param = 0.6;
matthiasm@1 164
matthiasm@1 165 // pitch-spaced frequency vector
matthiasm@1 166 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@1 167 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@1 168 vector<float> cq_f;
matthiasm@1 169 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@1 170 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@1 171 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@1 172 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@1 173 for (int k = -1; k < 2; ++k) {
matthiasm@1 174 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@1 175 }
matthiasm@1 176 }
matthiasm@1 177 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@1 178 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@1 179
matthiasm@1 180 // make out frequency vector
matthiasm@1 181 vector<float> out_f;
matthiasm@1 182
matthiasm@1 183 float curr_f;
matthiasm@1 184 float floatbin;
matthiasm@1 185 float curr_amp;
matthiasm@1 186 // now for every combination calculate the matrix element
matthiasm@1 187 unsigned countElement = 0;
matthiasm@1 188 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
matthiasm@1 189 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
matthiasm@1 190 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
matthiasm@1 191 if (curr_f > cq_f[nNote-1]) break;
matthiasm@1 192 floatbin = (iOut * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
matthiasm@1 193 curr_amp = pow(s_param,float(iHarm-1));
matthiasm@1 194 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
matthiasm@1 195 // cerr << dm[countElement] << endl;
matthiasm@1 196 dm[countElement] = cospuls(iNote+1.0, floatbin, binspersemitone + 0.0);
matthiasm@1 197 countElement++;
matthiasm@1 198 }
matthiasm@1 199 }
matthiasm@1 200 }
matthiasm@1 201 }
matthiasm@1 202
matthiasm@0 203
matthiasm@0 204 NNLSChroma::NNLSChroma(float inputSampleRate) :
matthiasm@0 205 Plugin(inputSampleRate),
matthiasm@0 206 m_fl(0),
matthiasm@0 207 m_blockSize(0),
matthiasm@0 208 m_stepSize(0),
matthiasm@0 209 m_lengthOfNoteIndex(0),
matthiasm@0 210 m_meanTuning0(0),
matthiasm@0 211 m_meanTuning1(0),
matthiasm@0 212 m_meanTuning2(0),
matthiasm@0 213 m_localTuning0(0),
matthiasm@0 214 m_localTuning1(0),
matthiasm@0 215 m_localTuning2(0),
matthiasm@0 216 m_paling(0),
matthiasm@0 217 m_localTuning(0),
matthiasm@0 218 m_kernelValue(0),
matthiasm@0 219 m_kernelFftIndex(0),
matthiasm@0 220 m_kernelNoteIndex(0),
matthiasm@1 221 m_dict(0),
matthiasm@0 222 m_tuneLocal(false),
matthiasm@0 223 m_dictID(0)
matthiasm@0 224 {
matthiasm@0 225 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@1 226 m_dict = new double[nNote * 84];
matthiasm@1 227 dictionaryMatrix(m_dict);
matthiasm@0 228 }
matthiasm@0 229
matthiasm@0 230
matthiasm@0 231 NNLSChroma::~NNLSChroma()
matthiasm@0 232 {
matthiasm@0 233 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
matthiasm@1 234 delete [] m_dict;
matthiasm@0 235 }
matthiasm@0 236
matthiasm@0 237 string
matthiasm@0 238 NNLSChroma::getIdentifier() const
matthiasm@0 239 {
matthiasm@0 240 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0 241 return "nnls_chroma";
matthiasm@0 242 }
matthiasm@0 243
matthiasm@0 244 string
matthiasm@0 245 NNLSChroma::getName() const
matthiasm@0 246 {
matthiasm@0 247 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0 248 return "NNLS Chroma";
matthiasm@0 249 }
matthiasm@0 250
matthiasm@0 251 string
matthiasm@0 252 NNLSChroma::getDescription() const
matthiasm@0 253 {
matthiasm@0 254 // Return something helpful here!
matthiasm@0 255 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@0 256 return "";
matthiasm@0 257 }
matthiasm@0 258
matthiasm@0 259 string
matthiasm@0 260 NNLSChroma::getMaker() const
matthiasm@0 261 {
matthiasm@0 262 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 263 // Your name here
matthiasm@0 264 return "Matthias Mauch";
matthiasm@0 265 }
matthiasm@0 266
matthiasm@0 267 int
matthiasm@0 268 NNLSChroma::getPluginVersion() const
matthiasm@0 269 {
matthiasm@0 270 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 271 // Increment this each time you release a version that behaves
matthiasm@0 272 // differently from the previous one
matthiasm@0 273 return 1;
matthiasm@0 274 }
matthiasm@0 275
matthiasm@0 276 string
matthiasm@0 277 NNLSChroma::getCopyright() const
matthiasm@0 278 {
matthiasm@0 279 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 280 // This function is not ideally named. It does not necessarily
matthiasm@0 281 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 282 // should indicate the terms under which it is distributed. For
matthiasm@0 283 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0 284 return "Copyright (2010). All rights reserved.";
matthiasm@0 285 }
matthiasm@0 286
matthiasm@0 287 NNLSChroma::InputDomain
matthiasm@0 288 NNLSChroma::getInputDomain() const
matthiasm@0 289 {
matthiasm@0 290 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 291 return FrequencyDomain;
matthiasm@0 292 }
matthiasm@0 293
matthiasm@0 294 size_t
matthiasm@0 295 NNLSChroma::getPreferredBlockSize() const
matthiasm@0 296 {
matthiasm@0 297 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 298 return 16384; // 0 means "I can handle any block size"
matthiasm@0 299 }
matthiasm@0 300
matthiasm@0 301 size_t
matthiasm@0 302 NNLSChroma::getPreferredStepSize() const
matthiasm@0 303 {
matthiasm@0 304 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 305 return 2048; // 0 means "anything sensible"; in practice this
matthiasm@0 306 // means the same as the block size for TimeDomain
matthiasm@0 307 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 308 }
matthiasm@0 309
matthiasm@0 310 size_t
matthiasm@0 311 NNLSChroma::getMinChannelCount() const
matthiasm@0 312 {
matthiasm@0 313 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 314 return 1;
matthiasm@0 315 }
matthiasm@0 316
matthiasm@0 317 size_t
matthiasm@0 318 NNLSChroma::getMaxChannelCount() const
matthiasm@0 319 {
matthiasm@0 320 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 321 return 1;
matthiasm@0 322 }
matthiasm@0 323
matthiasm@0 324 NNLSChroma::ParameterList
matthiasm@0 325 NNLSChroma::getParameterDescriptors() const
matthiasm@0 326 {
matthiasm@0 327 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 328 ParameterList list;
matthiasm@0 329
matthiasm@0 330 ParameterDescriptor d0;
matthiasm@0 331 d0.identifier = "notedict";
matthiasm@0 332 d0.name = "note dictionary";
matthiasm@0 333 d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@0 334 d0.unit = "";
matthiasm@0 335 d0.minValue = 0;
matthiasm@1 336 d0.maxValue = 1;
matthiasm@0 337 d0.defaultValue = 0;
matthiasm@0 338 d0.isQuantized = true;
matthiasm@0 339 d0.valueNames.push_back("s = 0.6");
matthiasm@1 340 // d0.valueNames.push_back("s = 0.9");
matthiasm@1 341 // d0.valueNames.push_back("s linearly spaced");
matthiasm@0 342 d0.valueNames.push_back("no NNLS");
matthiasm@0 343 d0.quantizeStep = 1.0;
matthiasm@0 344 list.push_back(d0);
matthiasm@0 345
matthiasm@0 346 ParameterDescriptor d1;
matthiasm@0 347 d1.identifier = "tuningmode";
matthiasm@0 348 d1.name = "tuning mode";
matthiasm@0 349 d1.description = "Tuning can be performed locally or on the whole extraction area.";
matthiasm@0 350 d1.unit = "";
matthiasm@0 351 d1.minValue = 0;
matthiasm@0 352 d1.maxValue = 1;
matthiasm@0 353 d1.defaultValue = 1;
matthiasm@0 354 d1.isQuantized = true;
matthiasm@0 355 d1.valueNames.push_back("global tuning");
matthiasm@0 356 d1.valueNames.push_back("local tuning");
matthiasm@0 357 d1.quantizeStep = 1.0;
matthiasm@0 358 list.push_back(d1);
matthiasm@0 359
matthiasm@0 360 ParameterDescriptor d2;
matthiasm@0 361 d2.identifier = "paling";
matthiasm@0 362 d2.name = "spectral paling";
matthiasm@0 363 d2.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@0 364 d2.unit = "";
matthiasm@0 365 d2.minValue = 0;
matthiasm@0 366 d2.maxValue = 1;
matthiasm@0 367 d2.defaultValue = 0.5;
matthiasm@0 368 d2.isQuantized = false;
matthiasm@0 369 // d1.valueNames.push_back("global tuning");
matthiasm@0 370 // d1.valueNames.push_back("local tuning");
matthiasm@0 371 // d1.quantizeStep = 0.1;
matthiasm@0 372 list.push_back(d2);
matthiasm@0 373
matthiasm@0 374 return list;
matthiasm@0 375 }
matthiasm@0 376
matthiasm@0 377 float
matthiasm@0 378 NNLSChroma::getParameter(string identifier) const
matthiasm@0 379 {
matthiasm@0 380 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 381 if (identifier == "notedict") {
matthiasm@0 382 return m_dictID;
matthiasm@0 383 }
matthiasm@0 384
matthiasm@0 385 if (identifier == "paling") {
matthiasm@0 386 return m_paling;
matthiasm@0 387 }
matthiasm@0 388
matthiasm@0 389 if (identifier == "tuningmode") {
matthiasm@0 390 if (m_tuneLocal) {
matthiasm@0 391 return 1.0;
matthiasm@0 392 } else {
matthiasm@0 393 return 0.0;
matthiasm@0 394 }
matthiasm@0 395 }
matthiasm@0 396
matthiasm@0 397 return 0;
matthiasm@0 398
matthiasm@0 399 }
matthiasm@0 400
matthiasm@0 401 void
matthiasm@0 402 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0 403 {
matthiasm@0 404 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 405 if (identifier == "notedict") {
matthiasm@0 406 m_dictID = (int) value;
matthiasm@0 407 }
matthiasm@0 408
matthiasm@0 409 if (identifier == "paling") {
matthiasm@0 410 m_paling = value;
matthiasm@0 411 }
matthiasm@0 412
matthiasm@0 413 if (identifier == "tuningmode") {
matthiasm@0 414 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 415 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 416 }
matthiasm@0 417 }
matthiasm@0 418
matthiasm@0 419 NNLSChroma::ProgramList
matthiasm@0 420 NNLSChroma::getPrograms() const
matthiasm@0 421 {
matthiasm@0 422 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 423 ProgramList list;
matthiasm@0 424
matthiasm@0 425 // If you have no programs, return an empty list (or simply don't
matthiasm@0 426 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 427
matthiasm@0 428 return list;
matthiasm@0 429 }
matthiasm@0 430
matthiasm@0 431 string
matthiasm@0 432 NNLSChroma::getCurrentProgram() const
matthiasm@0 433 {
matthiasm@0 434 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 435 return ""; // no programs
matthiasm@0 436 }
matthiasm@0 437
matthiasm@0 438 void
matthiasm@0 439 NNLSChroma::selectProgram(string name)
matthiasm@0 440 {
matthiasm@0 441 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 442 }
matthiasm@0 443
matthiasm@0 444
matthiasm@0 445 NNLSChroma::OutputList
matthiasm@0 446 NNLSChroma::getOutputDescriptors() const
matthiasm@0 447 {
matthiasm@0 448 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 449 OutputList list;
matthiasm@0 450
matthiasm@0 451 // Make chroma names for the binNames property
matthiasm@0 452 vector<string> chromanames;
matthiasm@0 453 vector<string> bothchromanames;
matthiasm@0 454 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0 455 bothchromanames.push_back(notenames[iNote]);
matthiasm@0 456 if (iNote < 12) {
matthiasm@0 457 chromanames.push_back(notenames[iNote]);
matthiasm@0 458 }
matthiasm@0 459 }
matthiasm@0 460
matthiasm@1 461 // int nNote = 84;
matthiasm@0 462
matthiasm@0 463 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0 464 // Every plugin must have at least one output.
matthiasm@0 465
matthiasm@0 466 OutputDescriptor d0;
matthiasm@0 467 d0.identifier = "tuning";
matthiasm@0 468 d0.name = "Tuning";
matthiasm@0 469 d0.description = "The concert pitch.";
matthiasm@0 470 d0.unit = "Hz";
matthiasm@0 471 d0.hasFixedBinCount = true;
matthiasm@0 472 d0.binCount = 0;
matthiasm@0 473 d0.hasKnownExtents = true;
matthiasm@0 474 d0.minValue = 427.47;
matthiasm@0 475 d0.maxValue = 452.89;
matthiasm@0 476 d0.isQuantized = false;
matthiasm@0 477 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 478 d0.hasDuration = false;
matthiasm@0 479 list.push_back(d0);
matthiasm@0 480
matthiasm@0 481 OutputDescriptor d1;
matthiasm@0 482 d1.identifier = "logfreqspec";
matthiasm@0 483 d1.name = "Log-Frequency Spectrum";
matthiasm@0 484 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0 485 d1.unit = "";
matthiasm@0 486 d1.hasFixedBinCount = true;
matthiasm@0 487 d1.binCount = nNote;
matthiasm@0 488 d1.hasKnownExtents = false;
matthiasm@0 489 d1.isQuantized = false;
matthiasm@0 490 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 491 d1.hasDuration = false;
matthiasm@0 492 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 493 list.push_back(d1);
matthiasm@0 494
matthiasm@0 495 OutputDescriptor d2;
matthiasm@0 496 d2.identifier = "tunedlogfreqspec";
matthiasm@0 497 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0 498 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0 499 d2.unit = "";
matthiasm@0 500 d2.hasFixedBinCount = true;
matthiasm@0 501 d2.binCount = 256;
matthiasm@0 502 d2.hasKnownExtents = false;
matthiasm@0 503 d2.isQuantized = false;
matthiasm@0 504 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 505 d2.hasDuration = false;
matthiasm@0 506 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 507 list.push_back(d2);
matthiasm@0 508
matthiasm@0 509 OutputDescriptor d3;
matthiasm@0 510 d3.identifier = "semitonespectrum";
matthiasm@0 511 d3.name = "Semitone Spectrum";
matthiasm@0 512 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0 513 d3.unit = "";
matthiasm@0 514 d3.hasFixedBinCount = true;
matthiasm@0 515 d3.binCount = 84;
matthiasm@0 516 d3.hasKnownExtents = false;
matthiasm@0 517 d3.isQuantized = false;
matthiasm@0 518 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 519 d3.hasDuration = false;
matthiasm@0 520 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 521 list.push_back(d3);
matthiasm@0 522
matthiasm@0 523 OutputDescriptor d4;
matthiasm@0 524 d4.identifier = "chroma";
matthiasm@0 525 d4.name = "Chromagram";
matthiasm@0 526 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0 527 d4.unit = "";
matthiasm@0 528 d4.hasFixedBinCount = true;
matthiasm@0 529 d4.binCount = 12;
matthiasm@0 530 d4.binNames = chromanames;
matthiasm@0 531 d4.hasKnownExtents = false;
matthiasm@0 532 d4.isQuantized = false;
matthiasm@0 533 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 534 d4.hasDuration = false;
matthiasm@0 535 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 536 list.push_back(d4);
matthiasm@0 537
matthiasm@0 538 OutputDescriptor d5;
matthiasm@0 539 d5.identifier = "basschroma";
matthiasm@0 540 d5.name = "Bass Chromagram";
matthiasm@0 541 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0 542 d5.unit = "";
matthiasm@0 543 d5.hasFixedBinCount = true;
matthiasm@0 544 d5.binCount = 12;
matthiasm@0 545 d5.binNames = chromanames;
matthiasm@0 546 d5.hasKnownExtents = false;
matthiasm@0 547 d5.isQuantized = false;
matthiasm@0 548 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 549 d5.hasDuration = false;
matthiasm@0 550 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 551 list.push_back(d5);
matthiasm@0 552
matthiasm@0 553 OutputDescriptor d6;
matthiasm@0 554 d6.identifier = "bothchroma";
matthiasm@0 555 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0 556 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0 557 d6.unit = "";
matthiasm@0 558 d6.hasFixedBinCount = true;
matthiasm@0 559 d6.binCount = 24;
matthiasm@0 560 d6.binNames = bothchromanames;
matthiasm@0 561 d6.hasKnownExtents = false;
matthiasm@0 562 d6.isQuantized = false;
matthiasm@0 563 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 564 d6.hasDuration = false;
matthiasm@0 565 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 566 list.push_back(d6);
matthiasm@0 567
matthiasm@0 568 OutputDescriptor d7;
matthiasm@0 569 d7.identifier = "simplechord";
matthiasm@0 570 d7.name = "Simple Chord Estimate";
matthiasm@0 571 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 572 d7.unit = "";
matthiasm@0 573 d7.hasFixedBinCount = true;
matthiasm@0 574 d7.binCount = 0;
matthiasm@0 575 d7.hasKnownExtents = false;
matthiasm@0 576 d7.isQuantized = false;
matthiasm@0 577 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 578 d7.hasDuration = false;
matthiasm@0 579 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 580 list.push_back(d7);
matthiasm@0 581
matthiasm@1 582 // OutputDescriptor d8;
matthiasm@1 583 // d8.identifier = "inconsistency";
matthiasm@1 584 // d8.name = "Harmonic inconsistency value";
matthiasm@1 585 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
matthiasm@1 586 // d8.unit = "";
matthiasm@1 587 // d8.hasFixedBinCount = true;
matthiasm@1 588 // d8.binCount = 1;
matthiasm@1 589 // d8.hasKnownExtents = false;
matthiasm@1 590 // d8.isQuantized = false;
matthiasm@1 591 // d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1 592 // d8.hasDuration = false;
matthiasm@1 593 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 594 // list.push_back(d8);
matthiasm@1 595 //
matthiasm@1 596 // OutputDescriptor d9;
matthiasm@1 597 // d9.identifier = "inconsistencysegment";
matthiasm@1 598 // d9.name = "Harmonic inconsistency segmenter";
matthiasm@1 599 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
matthiasm@1 600 // d9.unit = "";
matthiasm@1 601 // d9.hasFixedBinCount = true;
matthiasm@1 602 // d9.binCount = 0;
matthiasm@1 603 // d9.hasKnownExtents = true;
matthiasm@1 604 // d9.minValue = 0.1;
matthiasm@1 605 // d9.maxValue = 0.9;
matthiasm@1 606 // d9.isQuantized = false;
matthiasm@1 607 // d9.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@1 608 // d9.hasDuration = false;
matthiasm@1 609 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 610 // list.push_back(d9);
matthiasm@1 611 //
matthiasm@1 612 OutputDescriptor d10;
matthiasm@1 613 d10.identifier = "localtuning";
matthiasm@1 614 d10.name = "Local tuning";
matthiasm@1 615 d10.description = "";
matthiasm@1 616 d10.unit = "Hz";
matthiasm@1 617 d10.hasFixedBinCount = true;
matthiasm@1 618 d10.binCount = 1;
matthiasm@1 619 d10.hasKnownExtents = true;
matthiasm@1 620 d10.minValue = 427.47;
matthiasm@1 621 d10.maxValue = 452.89;
matthiasm@1 622 d10.isQuantized = false;
matthiasm@1 623 d10.sampleType = OutputDescriptor::OneSamplePerStep;
matthiasm@1 624 d10.hasDuration = false;
matthiasm@1 625 d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 626 list.push_back(d10);
matthiasm@1 627
matthiasm@0 628 return list;
matthiasm@0 629 }
matthiasm@0 630
matthiasm@0 631
matthiasm@0 632 bool
matthiasm@0 633 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 634 {
matthiasm@1 635 if (debug_on) {
matthiasm@1 636 cerr << "--> initialise";
matthiasm@1 637 }
matthiasm@1 638
matthiasm@0 639 if (channels < getMinChannelCount() ||
matthiasm@0 640 channels > getMaxChannelCount()) return false;
matthiasm@0 641 m_blockSize = blockSize;
matthiasm@0 642 m_stepSize = stepSize;
matthiasm@0 643 frameCount = 0;
matthiasm@0 644 int tempn = 256 * m_blockSize/2;
matthiasm@1 645 cerr << "length of tempkernel : " << tempn << endl;
matthiasm@1 646 float *tempkernel;
matthiasm@1 647
matthiasm@1 648 tempkernel = new float[tempn];
matthiasm@1 649
matthiasm@0 650 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
matthiasm@1 651 m_kernelValue.clear();
matthiasm@1 652 m_kernelFftIndex.clear();
matthiasm@1 653 m_kernelNoteIndex.clear();
matthiasm@1 654 int countNonzero = 0;
matthiasm@0 655 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
matthiasm@1 656 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
matthiasm@1 657 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 658 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
matthiasm@0 659 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 660 countNonzero++;
matthiasm@0 661 }
matthiasm@1 662 m_kernelFftIndex.push_back(iFFT);
matthiasm@1 663 m_kernelNoteIndex.push_back(iNote);
matthiasm@0 664 }
matthiasm@0 665 }
matthiasm@1 666 }
matthiasm@1 667 cerr << "nonzero count : " << countNonzero << endl;
matthiasm@1 668 delete [] tempkernel;
matthiasm@1 669
matthiasm@1 670
matthiasm@0 671 return true;
matthiasm@0 672 }
matthiasm@0 673
matthiasm@0 674 void
matthiasm@0 675 NNLSChroma::reset()
matthiasm@0 676 {
matthiasm@0 677 if (debug_on) cerr << "--> reset";
matthiasm@0 678 // Clear buffers, reset stored values, etc
matthiasm@0 679 frameCount = 0;
matthiasm@0 680 m_dictID = 0;
matthiasm@1 681 m_kernelValue.clear();
matthiasm@1 682 m_kernelFftIndex.clear();
matthiasm@1 683 m_kernelNoteIndex.clear();
matthiasm@0 684 }
matthiasm@0 685
matthiasm@0 686 NNLSChroma::FeatureSet
matthiasm@0 687 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 688 {
matthiasm@0 689 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 690 // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix
matthiasm@0 691
matthiasm@0 692 frameCount++;
matthiasm@0 693 float *magnitude = new float[m_blockSize/2];
matthiasm@0 694
matthiasm@0 695 Feature f10; // local tuning
matthiasm@0 696
matthiasm@0 697 const float *fbuf = inputBuffers[0];
matthiasm@0 698
matthiasm@0 699 // make magnitude
matthiasm@0 700 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
matthiasm@0 701 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
matthiasm@0 702 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
matthiasm@1 703 // magnitude[iBin] = (iBin == frameCount - 1 || frameCount < 2) ? 1.0 : 0.0;
matthiasm@0 704 }
matthiasm@0 705
matthiasm@0 706
matthiasm@0 707 // note magnitude mapping using pre-calculated matrix
matthiasm@0 708 float *nm = new float[nNote]; // note magnitude
matthiasm@0 709 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 710 nm[iNote] = 0; // initialise as 0
matthiasm@0 711 }
matthiasm@0 712 int binCount = 0;
matthiasm@0 713 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
matthiasm@0 714 // cerr << ".";
matthiasm@1 715 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
matthiasm@1 716 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
matthiasm@0 717 binCount++;
matthiasm@0 718 }
matthiasm@1 719 // cerr << nm[20];
matthiasm@1 720 // cerr << endl;
matthiasm@0 721
matthiasm@0 722
matthiasm@0 723 float one_over_N = 1.0/frameCount;
matthiasm@0 724 // update means of complex tuning variables
matthiasm@0 725 m_meanTuning0 *= float(frameCount-1)*one_over_N;
matthiasm@0 726 m_meanTuning1 *= float(frameCount-1)*one_over_N;
matthiasm@0 727 m_meanTuning2 *= float(frameCount-1)*one_over_N;
matthiasm@0 728
matthiasm@0 729 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 730 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 731 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 732 m_meanTuning2 += nm[iTone + 2]*one_over_N;
matthiasm@0 733 m_localTuning0 *= 0.99994; m_localTuning0 += nm[iTone + 0];
matthiasm@0 734 m_localTuning1 *= 0.99994; m_localTuning1 += nm[iTone + 1];
matthiasm@0 735 m_localTuning2 *= 0.99994; m_localTuning2 += nm[iTone + 2];
matthiasm@0 736 }
matthiasm@0 737
matthiasm@0 738 // if (m_tuneLocal) {
matthiasm@0 739 // local tuning
matthiasm@0 740 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
matthiasm@0 741 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
matthiasm@0 742 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
matthiasm@0 743 m_localTuning.push_back(normalisedtuning);
matthiasm@0 744 float tuning440 = 440 * pow(2,normalisedtuning/12);
matthiasm@0 745 f10.values.push_back(tuning440);
matthiasm@0 746 // }
matthiasm@0 747
matthiasm@0 748 Feature f1; // logfreqspec
matthiasm@0 749 f1.hasTimestamp = true;
matthiasm@0 750 f1.timestamp = timestamp;
matthiasm@0 751 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 752 f1.values.push_back(nm[iNote]);
matthiasm@0 753 }
matthiasm@0 754
matthiasm@0 755 FeatureSet fs;
matthiasm@0 756 fs[1].push_back(f1);
matthiasm@0 757 fs[10].push_back(f10);
matthiasm@0 758
matthiasm@0 759 // deletes
matthiasm@0 760 delete[] magnitude;
matthiasm@0 761 delete[] nm;
matthiasm@0 762
matthiasm@0 763 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
matthiasm@0 764 return fs;
matthiasm@0 765 }
matthiasm@0 766
matthiasm@0 767 NNLSChroma::FeatureSet
matthiasm@0 768 NNLSChroma::getRemainingFeatures()
matthiasm@0 769 {
matthiasm@0 770 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
matthiasm@0 771 FeatureSet fsOut;
matthiasm@0 772 //
matthiasm@1 773 /** Calculate Tuning
matthiasm@1 774 calculate tuning from (using the angle of the complex number defined by the
matthiasm@1 775 cumulative mean real and imag values)
matthiasm@1 776 **/
matthiasm@1 777 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
matthiasm@1 778 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
matthiasm@1 779 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
matthiasm@1 780 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
matthiasm@1 781 int intShift = floor(normalisedtuning * 3);
matthiasm@1 782 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 783
matthiasm@1 784 char buffer0 [50];
matthiasm@1 785
matthiasm@1 786 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 787
matthiasm@1 788 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 789
matthiasm@1 790 // push tuning to FeatureSet fsOut
matthiasm@1 791 Feature f0; // tuning
matthiasm@1 792 f0.hasTimestamp = true;
matthiasm@1 793 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
matthiasm@1 794 f0.label = buffer0;
matthiasm@1 795 fsOut[0].push_back(f0);
matthiasm@1 796
matthiasm@1 797 /** Tune Log-Frequency Spectrogram
matthiasm@1 798 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
matthiasm@1 799 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
matthiasm@1 800 **/
matthiasm@1 801
matthiasm@1 802 float tempValue = 0;
matthiasm@1 803 float dbThreshold = 0; // relative to the background spectrum
matthiasm@1 804 float thresh = pow(10,dbThreshold/20);
matthiasm@1 805 // cerr << "tune local ? " << m_tuneLocal << endl;
matthiasm@1 806 int count = 0;
matthiasm@1 807
matthiasm@1 808 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
matthiasm@1 809 Feature f1 = *i;
matthiasm@1 810 Feature f2; // tuned log-frequency spectrum
matthiasm@1 811 f2.hasTimestamp = true;
matthiasm@1 812 f2.timestamp = f1.timestamp;
matthiasm@1 813 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 814
matthiasm@1 815 if (m_tuneLocal) {
matthiasm@1 816 intShift = floor(m_localTuning[count] * 3);
matthiasm@1 817 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 818 }
matthiasm@1 819
matthiasm@1 820 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 821
matthiasm@1 822 for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@1 823 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
matthiasm@1 824 f2.values.push_back(tempValue);
matthiasm@1 825 }
matthiasm@1 826
matthiasm@1 827 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
matthiasm@1 828 vector<float> runningmean = SpecialConvolution(f2.values,hw);
matthiasm@1 829 vector<float> runningstd;
matthiasm@1 830 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@1 831 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
matthiasm@1 832 }
matthiasm@1 833 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
matthiasm@1 834 for (int i = 0; i < 256; i++) {
matthiasm@1 835 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
matthiasm@1 836 if (runningstd[i] > 0) {
matthiasm@1 837 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
matthiasm@1 838 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 839 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
matthiasm@1 840 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 841 }
matthiasm@1 842 if (f2.values[i] < 0) {
matthiasm@1 843 cerr << "ERROR: negative value in logfreq spectrum" << endl;
matthiasm@1 844 }
matthiasm@1 845 }
matthiasm@1 846 fsOut[2].push_back(f2);
matthiasm@1 847 count++;
matthiasm@1 848 }
matthiasm@1 849
matthiasm@1 850 /** Semitone spectrum and chromagrams
matthiasm@1 851 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
matthiasm@1 852 is inferred using a non-negative least squares algorithm.
matthiasm@1 853 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
matthiasm@1 854 bass and treble stacked onto each other).
matthiasm@1 855 **/
matthiasm@1 856 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
matthiasm@1 857
matthiasm@1 858 vector<vector<float> > chordogram;
matthiasm@1 859 vector<float> oldchroma = vector<float>(12,0);
matthiasm@1 860 vector<float> oldbasschroma = vector<float>(12,0);
matthiasm@1 861 count = 0;
matthiasm@1 862
matthiasm@1 863 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
matthiasm@1 864 Feature f2 = *it; // logfreq spectrum
matthiasm@1 865 Feature f3; // semitone spectrum
matthiasm@1 866 Feature f4; // treble chromagram
matthiasm@1 867 Feature f5; // bass chromagram
matthiasm@1 868 Feature f6; // treble and bass chromagram
matthiasm@1 869
matthiasm@1 870 f3.hasTimestamp = true;
matthiasm@1 871 f3.timestamp = f2.timestamp;
matthiasm@1 872
matthiasm@1 873 f4.hasTimestamp = true;
matthiasm@1 874 f4.timestamp = f2.timestamp;
matthiasm@1 875
matthiasm@1 876 f5.hasTimestamp = true;
matthiasm@1 877 f5.timestamp = f2.timestamp;
matthiasm@1 878
matthiasm@1 879 f6.hasTimestamp = true;
matthiasm@1 880 f6.timestamp = f2.timestamp;
matthiasm@1 881
matthiasm@1 882 double b[256];
matthiasm@1 883
matthiasm@1 884 bool some_b_greater_zero = false;
matthiasm@1 885 for (int i = 0; i < 256; i++) {
matthiasm@1 886 b[i] = f2.values[i];
matthiasm@1 887 if (b[i] > 0) {
matthiasm@1 888 some_b_greater_zero = true;
matthiasm@1 889 }
matthiasm@1 890 }
matthiasm@1 891
matthiasm@1 892 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 893
matthiasm@1 894 vector<float> chroma = vector<float>(12, 0);
matthiasm@1 895 vector<float> basschroma = vector<float>(12, 0);
matthiasm@1 896 float currval;
matthiasm@1 897 unsigned iSemitone = 0;
matthiasm@1 898
matthiasm@1 899 if (some_b_greater_zero) {
matthiasm@1 900 if (m_dictID == 0) {
matthiasm@1 901 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@1 902 currval = 0;
matthiasm@1 903 for (unsigned iBin = 0; iBin < 3; ++iBin) {
matthiasm@1 904 currval += b[iNote + iBin];
matthiasm@1 905 }
matthiasm@1 906 f3.values.push_back(currval);
matthiasm@1 907 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
matthiasm@1 908 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
matthiasm@1 909 iSemitone++;
matthiasm@1 910 }
matthiasm@1 911
matthiasm@1 912 } else {
matthiasm@1 913 double x[84+1] = {1.0};
matthiasm@1 914 double rnorm;
matthiasm@1 915 double w[84+1];
matthiasm@1 916 double zz[84+1];
matthiasm@1 917 int indx[84+2];
matthiasm@1 918 int mode;
matthiasm@1 919
matthiasm@1 920 nnls(m_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
matthiasm@1 921 }
matthiasm@1 922 }
matthiasm@1 923
matthiasm@1 924 f4.values = chroma;
matthiasm@1 925 f5.values = basschroma;
matthiasm@1 926 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@1 927 f6.values = chroma;
matthiasm@1 928
matthiasm@1 929 // local chord estimation
matthiasm@1 930 vector<float> currentChordSalience;
matthiasm@1 931 float tempchordvalue = 0;
matthiasm@1 932 float sumchordvalue = 0;
matthiasm@1 933 int nChord = nChorddict / 24;
matthiasm@1 934 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 935 tempchordvalue = 0;
matthiasm@1 936 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@1 937 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 938 }
matthiasm@1 939 for (int iBin = 12; iBin < 24; iBin++) {
matthiasm@1 940 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 941 }
matthiasm@1 942 sumchordvalue+=tempchordvalue;
matthiasm@1 943 currentChordSalience.push_back(tempchordvalue);
matthiasm@1 944 }
matthiasm@1 945 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 946 currentChordSalience[iChord] /= sumchordvalue;
matthiasm@1 947 }
matthiasm@1 948 chordogram.push_back(currentChordSalience);
matthiasm@1 949
matthiasm@1 950 fsOut[3].push_back(f3);
matthiasm@1 951 fsOut[4].push_back(f4);
matthiasm@1 952 fsOut[5].push_back(f5);
matthiasm@1 953 fsOut[6].push_back(f6);
matthiasm@1 954 // if (x) free(x);
matthiasm@1 955 // delete[] b;
matthiasm@1 956 count++;
matthiasm@1 957 }
matthiasm@0 958 // // cerr << m_stepSize << endl<< endl;
matthiasm@0 959 // count = 0;
matthiasm@0 960 // int kernelwidth = (49 * 2048) / m_stepSize;
matthiasm@0 961 // int nChord = nChorddict / 24;
matthiasm@0 962 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
matthiasm@0 963 //
matthiasm@0 964 // /* Simple chord estimation
matthiasm@0 965 // I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@0 966 // take the maximum. Very simple, don't do this at home...
matthiasm@0 967 // */
matthiasm@0 968 // vector<int> chordSequence;
matthiasm@0 969 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@0 970 //
matthiasm@0 971 // int startIndex = max(count - kernelwidth/2 + 1,0);
matthiasm@0 972 // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1 + 1);
matthiasm@0 973 // vector<float> temp = vector<float>(nChord,0);
matthiasm@0 974 // for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@0 975 // float val = 0;
matthiasm@0 976 // for (int i = startIndex; i < endIndex; i++) {
matthiasm@0 977 // val += chordogram[i][iChord] *
matthiasm@0 978 // (kernelwidth - abs(i - startIndex - kernelwidth * 0.5)); // weigthed sum (triangular window)
matthiasm@0 979 // }
matthiasm@0 980 // temp[iChord] = val; // sum
matthiasm@0 981 // }
matthiasm@0 982 //
matthiasm@0 983 // // get maximum for "chord estimate"
matthiasm@0 984 //
matthiasm@0 985 // float bestChordValue = 0;
matthiasm@0 986 // int bestChordIndex = nChord-1; // "no chord" is default
matthiasm@0 987 // for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@0 988 // if (temp[iChord] > bestChordValue) {
matthiasm@0 989 // bestChordValue = temp[iChord];
matthiasm@0 990 // bestChordIndex = iChord;
matthiasm@0 991 // }
matthiasm@0 992 // }
matthiasm@0 993 // // cerr << bestChordIndex << endl;
matthiasm@0 994 // chordSequence.push_back(bestChordIndex);
matthiasm@0 995 // count++;
matthiasm@0 996 // }
matthiasm@0 997 // // mode filter on chordSequence
matthiasm@0 998 // count = 0;
matthiasm@0 999 // int oldChordIndex = -1;
matthiasm@0 1000 // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@0 1001 // Feature f6 = *it;
matthiasm@0 1002 // Feature f7; // chord estimate
matthiasm@0 1003 //
matthiasm@0 1004 // f7.hasTimestamp = true;
matthiasm@0 1005 // f7.timestamp = f6.timestamp;
matthiasm@0 1006 //
matthiasm@0 1007 // vector<int> chordCount = vector<int>(121,0);
matthiasm@0 1008 //
matthiasm@0 1009 // int maxChordCount = 0;
matthiasm@0 1010 // int maxChordIndex = 120;
matthiasm@0 1011 // int startIndex = max(count - kernelwidth/2,0);
matthiasm@0 1012 // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1);
matthiasm@0 1013 // for (int i = startIndex; i < endIndex; i++) {
matthiasm@0 1014 // chordCount[chordSequence[i]]++;
matthiasm@0 1015 // if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@0 1016 // maxChordCount++;
matthiasm@0 1017 // maxChordIndex = chordSequence[i];
matthiasm@0 1018 // }
matthiasm@0 1019 // }
matthiasm@0 1020 // if (oldChordIndex != maxChordIndex) {
matthiasm@0 1021 // oldChordIndex = maxChordIndex;
matthiasm@0 1022 //
matthiasm@0 1023 // char buffer1 [50];
matthiasm@0 1024 // if (maxChordIndex < nChord - 1) {
matthiasm@0 1025 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
matthiasm@0 1026 // } else {
matthiasm@0 1027 // sprintf(buffer1, "N");
matthiasm@0 1028 // }
matthiasm@0 1029 // f7.label = buffer1;
matthiasm@0 1030 // fsOut[7].push_back(f7);
matthiasm@0 1031 // }
matthiasm@0 1032 // count++;
matthiasm@0 1033 // }
matthiasm@0 1034 // // musicity
matthiasm@0 1035 // count = 0;
matthiasm@0 1036 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
matthiasm@0 1037 // vector<float> musicityValue;
matthiasm@0 1038 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1039 // Feature f4 = *it;
matthiasm@0 1040 //
matthiasm@0 1041 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1042 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1043 // float chromasum = 0;
matthiasm@0 1044 // float diffsum = 0;
matthiasm@0 1045 // for (int k = 0; k < 12; k++) {
matthiasm@0 1046 // for (int i = startIndex + 1; i < endIndex; i++) {
matthiasm@0 1047 // chromasum += pow(fsOut[4][i].values[k],2);
matthiasm@0 1048 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
matthiasm@0 1049 // }
matthiasm@0 1050 // }
matthiasm@0 1051 // diffsum /= chromasum;
matthiasm@0 1052 // musicityValue.push_back(diffsum);
matthiasm@0 1053 // count++;
matthiasm@0 1054 // }
matthiasm@0 1055 //
matthiasm@0 1056 // float musicityThreshold = 0.44;
matthiasm@0 1057 // if (m_stepSize == 4096) {
matthiasm@0 1058 // musicityThreshold = 0.74;
matthiasm@0 1059 // }
matthiasm@0 1060 // if (m_stepSize == 4410) {
matthiasm@0 1061 // musicityThreshold = 0.77;
matthiasm@0 1062 // }
matthiasm@0 1063 //
matthiasm@0 1064 // count = 0;
matthiasm@0 1065 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1066 // Feature f4 = *it;
matthiasm@0 1067 // Feature f8; // musicity
matthiasm@0 1068 // Feature f9; // musicity segmenter
matthiasm@0 1069 //
matthiasm@0 1070 // f8.hasTimestamp = true;
matthiasm@0 1071 // f8.timestamp = f4.timestamp;
matthiasm@0 1072 // f9.hasTimestamp = true;
matthiasm@0 1073 // f9.timestamp = f4.timestamp;
matthiasm@0 1074 //
matthiasm@0 1075 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1076 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1077 // int musicityCount = 0;
matthiasm@0 1078 // for (int i = startIndex; i <= endIndex; i++) {
matthiasm@0 1079 // if (musicityValue[i] > musicityThreshold) musicityCount++;
matthiasm@0 1080 // }
matthiasm@0 1081 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
matthiasm@0 1082 //
matthiasm@0 1083 // if (isSpeech) {
matthiasm@0 1084 // if (oldlabeltype != 2) {
matthiasm@0 1085 // f9.label = "Speech";
matthiasm@0 1086 // fsOut[9].push_back(f9);
matthiasm@0 1087 // oldlabeltype = 2;
matthiasm@0 1088 // }
matthiasm@0 1089 // } else {
matthiasm@0 1090 // if (oldlabeltype != 1) {
matthiasm@0 1091 // f9.label = "Music";
matthiasm@0 1092 // fsOut[9].push_back(f9);
matthiasm@0 1093 // oldlabeltype = 1;
matthiasm@0 1094 // }
matthiasm@0 1095 // }
matthiasm@0 1096 // f8.values.push_back(musicityValue[count]);
matthiasm@0 1097 // fsOut[8].push_back(f8);
matthiasm@0 1098 // count++;
matthiasm@0 1099 // }
matthiasm@0 1100 return fsOut;
matthiasm@0 1101
matthiasm@0 1102 }
matthiasm@0 1103