annotate chromamethods.cpp @ 35:cf8898a0174c matthiasm-plugin

* Split out NNLSChroma plugin into three plugins (chroma, chordino, tuning) with a common base class. There's still quite a lot of duplication between the getRemainingFeatures functions. Also add copyright / copying headers, etc.
author Chris Cannam
date Fri, 22 Oct 2010 11:30:21 +0100
parents 608b0c8ad3f8
children d6bb9b43ac1c
rev   line source
Chris@35 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@35 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@27 19 #include "chromamethods.h"
Chris@27 20
Chris@27 21 #include <cmath>
Chris@27 22 #include <list>
Chris@27 23 #include <iostream>
Chris@27 24 #include <fstream>
Chris@27 25 #include <sstream>
Chris@27 26 #include <cassert>
Chris@27 27 #include <cstdlib>
Chris@27 28 #include <cstdio>
Chris@27 29 #include <boost/tokenizer.hpp>
Chris@27 30 #include <boost/iostreams/device/file.hpp>
Chris@27 31 #include <boost/iostreams/stream.hpp>
Chris@27 32 #include <boost/lexical_cast.hpp>
Chris@27 33
Chris@27 34 #include "chorddict.cpp"
Chris@27 35
Chris@27 36 using namespace std;
Chris@27 37 using namespace boost;
Chris@27 38
Chris@27 39
Chris@27 40 /** Special Convolution
Chris@27 41 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
Chris@27 42 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
Chris@27 43 as the first (last) valid convolution bin.
Chris@27 44 **/
Chris@27 45
Chris@27 46 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
Chris@27 47 {
Chris@27 48 float s;
Chris@27 49 int m, n;
Chris@27 50 int lenConvolvee = convolvee.size();
Chris@27 51 int lenKernel = kernel.size();
Chris@27 52
Chris@27 53 vector<float> Z(256,0);
Chris@27 54 assert(lenKernel % 2 != 0); // no exception handling !!!
Chris@27 55
Chris@27 56 for (n = lenKernel - 1; n < lenConvolvee; n++) {
Chris@27 57 s=0.0;
Chris@27 58 for (m = 0; m < lenKernel; m++) {
Chris@27 59 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
Chris@27 60 s += convolvee[n-m] * kernel[m];
Chris@27 61 // if (debug_on) cerr << "--> s = " << s << '\n';
Chris@27 62 }
Chris@27 63 // cerr << n - lenKernel/2 << endl;
Chris@27 64 Z[n -lenKernel/2] = s;
Chris@27 65 }
Chris@27 66
Chris@27 67 // fill upper and lower pads
Chris@27 68 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
Chris@27 69 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
Chris@27 70 Z[lenConvolvee - lenKernel/2 - 1];
Chris@27 71 return Z;
Chris@27 72 }
Chris@27 73
Chris@27 74 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
Chris@27 75 // {
Chris@27 76 // vector<float> freq(binnumbers.size, 0.0);
Chris@27 77 // for (unsigned i = 0; i < binnumbers.size; ++i) {
Chris@27 78 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
Chris@27 79 // }
Chris@27 80 // return freq;
Chris@27 81 // }
Chris@27 82
Chris@27 83 float cospuls(float x, float centre, float width)
Chris@27 84 {
Chris@27 85 float recipwidth = 1.0/width;
Chris@27 86 if (abs(x - centre) <= 0.5 * width) {
Chris@27 87 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
Chris@27 88 }
Chris@27 89 return 0.0;
Chris@27 90 }
Chris@27 91
Chris@27 92 float pitchCospuls(float x, float centre, int binsperoctave)
Chris@27 93 {
Chris@27 94 float warpedf = -binsperoctave * (log2(centre) - log2(x));
Chris@27 95 float out = cospuls(warpedf, 0.0, 2.0);
Chris@27 96 // now scale to correct for note density
Chris@27 97 float c = log(2.0)/binsperoctave;
Chris@27 98 if (x > 0) {
Chris@27 99 out = out / (c * x);
Chris@27 100 } else {
Chris@27 101 out = 0;
Chris@27 102 }
Chris@27 103 return out;
Chris@27 104 }
Chris@27 105
Chris@27 106 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
Chris@27 107
Chris@27 108 int binspersemitone = 3; // this must be 3
Chris@27 109 int minoctave = 0; // this must be 0
Chris@27 110 int maxoctave = 7; // this must be 7
Chris@27 111 int oversampling = 80;
Chris@27 112
Chris@27 113 // linear frequency vector
Chris@27 114 vector<float> fft_f;
Chris@27 115 for (int i = 0; i < blocksize/2; ++i) {
Chris@27 116 fft_f.push_back(i * (fs * 1.0 / blocksize));
Chris@27 117 }
Chris@27 118 float fft_width = fs * 2.0 / blocksize;
Chris@27 119
Chris@27 120 // linear oversampled frequency vector
Chris@27 121 vector<float> oversampled_f;
Chris@27 122 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
Chris@27 123 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
Chris@27 124 }
Chris@27 125
Chris@27 126 // pitch-spaced frequency vector
Chris@27 127 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
Chris@27 128 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
Chris@27 129 vector<float> cq_f;
Chris@27 130 float oob = 1.0/binspersemitone; // one over binspersemitone
Chris@27 131 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
Chris@27 132 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
Chris@27 133 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
Chris@27 134 for (int k = -1; k < 2; ++k) {
Chris@27 135 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
Chris@27 136 }
Chris@27 137 }
Chris@27 138 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
Chris@27 139 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
Chris@27 140
Chris@27 141 int nFFT = fft_f.size();
Chris@27 142
Chris@27 143 vector<float> fft_activation;
Chris@27 144 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
Chris@27 145 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
Chris@27 146 fft_activation.push_back(cosp);
Chris@27 147 // cerr << cosp << endl;
Chris@27 148 }
Chris@27 149
Chris@27 150 float cq_activation;
Chris@27 151 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
Chris@27 152 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
Chris@27 153 int curr_start = oversampling * iFFT - oversampling;
Chris@27 154 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
Chris@27 155 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
Chris@27 156 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
Chris@27 157 outmatrix[iFFT + nFFT * iCQ] = 0;
Chris@27 158 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
Chris@27 159 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
Chris@27 160 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
Chris@27 161 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
Chris@27 162 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
Chris@27 163 }
Chris@27 164 // if (iCQ == 1 || iCQ == 2) {
Chris@27 165 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
Chris@27 166 // }
Chris@27 167 }
Chris@27 168 }
Chris@27 169 }
Chris@27 170 return true;
Chris@27 171 }
Chris@27 172
Chris@27 173 void dictionaryMatrix(float* dm) {
Chris@27 174 int binspersemitone = 3; // this must be 3
Chris@27 175 int minoctave = 0; // this must be 0
Chris@27 176 int maxoctave = 7; // this must be 7
Chris@27 177 float s_param = 0.7;
Chris@27 178
Chris@27 179 // pitch-spaced frequency vector
Chris@27 180 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
Chris@27 181 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
Chris@27 182 vector<float> cq_f;
Chris@27 183 float oob = 1.0/binspersemitone; // one over binspersemitone
Chris@27 184 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
Chris@27 185 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
Chris@27 186 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
Chris@27 187 for (int k = -1; k < 2; ++k) {
Chris@27 188 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
Chris@27 189 }
Chris@27 190 }
Chris@27 191 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
Chris@27 192 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
Chris@27 193
Chris@27 194 float curr_f;
Chris@27 195 float floatbin;
Chris@27 196 float curr_amp;
Chris@27 197 // now for every combination calculate the matrix element
Chris@27 198 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
Chris@27 199 // cerr << iOut << endl;
Chris@27 200 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
Chris@27 201 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
Chris@27 202 // if (curr_f > cq_f[nNote-1]) break;
Chris@27 203 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
Chris@27 204 // cerr << floatbin << endl;
Chris@27 205 curr_amp = pow(s_param,float(iHarm-1));
Chris@27 206 // cerr << "curramp" << curr_amp << endl;
Chris@27 207 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
Chris@27 208 if (abs(iNote+1.0-floatbin)<2) {
Chris@27 209 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
Chris@27 210 // dm[iNote + nNote * iOut] += 1 * curr_amp;
Chris@27 211 }
Chris@27 212 }
Chris@27 213 }
Chris@27 214 }
Chris@27 215
Chris@27 216
Chris@27 217 }
Chris@27 218
Chris@30 219 static
Chris@30 220 std::vector<std::string>
Chris@30 221 getPluginPath()
Chris@30 222 {
Chris@30 223 //!!! This is duplicated from PluginHostAdapter::getPluginPath,
Chris@30 224 //!!! which is not available to us in the plugin (only to the
Chris@30 225 //!!! host)
Chris@30 226
Chris@30 227 std::vector<std::string> path;
Chris@30 228 std::string envPath;
Chris@30 229
Chris@30 230 char *cpath = getenv("VAMP_PATH");
Chris@30 231 if (cpath) envPath = cpath;
Chris@30 232
Chris@30 233 #ifdef _WIN32
Chris@30 234 #define PATH_SEPARATOR ';'
Chris@30 235 #define DEFAULT_VAMP_PATH "%ProgramFiles%\\Vamp Plugins"
Chris@30 236 #else
Chris@30 237 #define PATH_SEPARATOR ':'
Chris@30 238 #ifdef __APPLE__
Chris@30 239 #define DEFAULT_VAMP_PATH "$HOME/Library/Audio/Plug-Ins/Vamp:/Library/Audio/Plug-Ins/Vamp"
Chris@30 240 #else
Chris@30 241 #define DEFAULT_VAMP_PATH "$HOME/vamp:$HOME/.vamp:/usr/local/lib/vamp:/usr/lib/vamp"
Chris@30 242 #endif
Chris@30 243 #endif
Chris@30 244
Chris@30 245 if (envPath == "") {
Chris@30 246 envPath = DEFAULT_VAMP_PATH;
Chris@30 247 char *chome = getenv("HOME");
Chris@30 248 if (chome) {
Chris@30 249 std::string home(chome);
Chris@30 250 std::string::size_type f;
Chris@30 251 while ((f = envPath.find("$HOME")) != std::string::npos &&
Chris@30 252 f < envPath.length()) {
Chris@30 253 envPath.replace(f, 5, home);
Chris@30 254 }
Chris@30 255 }
Chris@30 256 #ifdef _WIN32
Chris@30 257 char *cpfiles = getenv("ProgramFiles");
Chris@30 258 if (!cpfiles) cpfiles = (char *)"C:\\Program Files";
Chris@30 259 std::string pfiles(cpfiles);
Chris@30 260 std::string::size_type f;
Chris@30 261 while ((f = envPath.find("%ProgramFiles%")) != std::string::npos &&
Chris@30 262 f < envPath.length()) {
Chris@30 263 envPath.replace(f, 14, pfiles);
Chris@30 264 }
Chris@30 265 #endif
Chris@30 266 }
Chris@30 267
Chris@30 268 std::string::size_type index = 0, newindex = 0;
Chris@30 269
Chris@30 270 while ((newindex = envPath.find(PATH_SEPARATOR, index)) < envPath.size()) {
Chris@30 271 path.push_back(envPath.substr(index, newindex - index));
Chris@30 272 index = newindex + 1;
Chris@30 273 }
Chris@30 274
Chris@30 275 path.push_back(envPath.substr(index));
Chris@30 276
Chris@30 277 return path;
Chris@30 278 }
Chris@27 279
Chris@27 280 vector<string> chordDictionary(vector<float> *mchorddict) {
Chris@30 281
Chris@27 282 typedef tokenizer<char_separator<char> > Tok;
Chris@27 283 char_separator<char> sep(",; ","=");
Chris@30 284
Chris@30 285 string chordDictBase("chord.dict");
Chris@30 286 string chordDictFilename;
Chris@30 287
Chris@30 288 vector<string> ppath = getPluginPath();
Chris@30 289 for (int i = 0; i < ppath.size(); ++i) {
Chris@30 290 chordDictFilename = ppath[i] + "/" + chordDictBase;
Chris@30 291 cerr << "Looking for chord.dict in " << chordDictFilename << "..." << endl;
Chris@30 292 if (iostreams::stream<iostreams::file_source>(chordDictFilename.c_str())
Chris@30 293 .is_open()) {
Chris@30 294 cerr << "(Success)" << endl;
Chris@30 295 break;
Chris@30 296 }
Chris@30 297 }
Chris@30 298
Chris@30 299 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename);
Chris@27 300 string line;
Chris@27 301 int iElement = 0;
Chris@27 302 int nChord = 0;
Chris@27 303
Chris@27 304 vector<string> loadedChordNames;
Chris@27 305 vector<float> loadedChordDict;
Chris@27 306 if (chordDictFile.is_open()) {
Chris@27 307 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
Chris@27 308 // first, get the chord definition
Chris@27 309 string chordType;
Chris@27 310 vector<float> tempPCVector;
Chris@27 311 // cerr << line << endl;
Chris@27 312 if (!line.empty() && line.substr(0,1) != "#") {
Chris@27 313 Tok tok(line, sep);
Chris@27 314 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
Chris@27 315 string tempString = *tok_iter;
Chris@27 316 // cerr << tempString << endl;
Chris@27 317 if (tok_iter == tok.begin()) { // either the chord name or a colon
Chris@27 318 if (tempString == "=") {
Chris@27 319 chordType = "";
Chris@27 320 } else {
Chris@27 321 chordType = tempString;
Chris@27 322 tok_iter++; // is this cheating ? :)
Chris@27 323 }
Chris@27 324 } else {
Chris@27 325 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
Chris@27 326 }
Chris@27 327 }
Chris@27 328
Chris@27 329 // now make all 12 chords of every type
Chris@27 330 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
Chris@27 331 // add bass slash notation
Chris@27 332 string slashNotation = "";
Chris@27 333 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
Chris@27 334 if (tempPCVector[(kSemitone) % 12] > 0.99) {
Chris@27 335 slashNotation = bassnames[iSemitone][kSemitone];
Chris@27 336 }
Chris@27 337 }
Chris@27 338 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
Chris@27 339 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
Chris@27 340 float bassValue = 0;
Chris@27 341 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
Chris@27 342 bassValue = 1;
Chris@27 343 } else {
Chris@27 344 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
Chris@27 345 }
Chris@27 346 loadedChordDict.push_back(bassValue);
Chris@27 347 }
Chris@27 348 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
Chris@27 349 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
Chris@27 350 }
Chris@27 351 ostringstream os;
Chris@27 352 if (slashNotation.empty()) {
Chris@27 353 os << notenames[12+iSemitone] << chordType;
Chris@27 354 } else {
Chris@27 355 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
Chris@27 356 }
Chris@27 357 // cerr << os.str() << endl;
Chris@27 358 loadedChordNames.push_back(os.str());
Chris@27 359 }
Chris@27 360 }
Chris@27 361 }
Chris@27 362 // N type
Chris@27 363 loadedChordNames.push_back("N");
Chris@27 364 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
Chris@27 365 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
Chris@27 366
Chris@27 367 // normalise
Chris@27 368 float sum = 0;
Chris@27 369 for (int i = 0; i < loadedChordDict.size(); i++) {
Chris@27 370 sum += pow(loadedChordDict[i],2);
Chris@27 371 if (i % 24 == 23) {
Chris@27 372 float invertedsum = 1.0/sqrt(sum);
Chris@27 373 for (int k = 0; k < 24; k++) {
Chris@27 374 loadedChordDict[i-k] *= invertedsum;
Chris@27 375 }
Chris@27 376 sum = 0;
Chris@27 377 }
Chris@27 378
Chris@27 379 }
Chris@27 380
Chris@27 381
Chris@27 382 nChord = 0;
Chris@27 383 for (int i = 0; i < loadedChordNames.size(); i++) {
Chris@27 384 nChord++;
Chris@27 385 }
Chris@27 386 chordDictFile.close();
Chris@27 387
Chris@27 388
Chris@27 389 // mchorddict = new float[nChord*24];
Chris@27 390 for (int i = 0; i < nChord*24; i++) {
Chris@27 391 mchorddict->push_back(loadedChordDict[i]);
Chris@27 392 }
Chris@27 393
Chris@27 394 } else {// use default from chorddict.cpp
Chris@27 395 // mchorddict = new float[nChorddict];
Chris@27 396 for (int i = 0; i < nChorddict; i++) {
Chris@27 397 mchorddict->push_back(chorddict[i]);
Chris@27 398 }
Chris@27 399
Chris@27 400 nChord = nChorddict/24;
Chris@27 401 // mchordnames = new string[nChorddict/24];
Chris@27 402 char buffer1 [50];
Chris@27 403 for (int i = 0; i < nChorddict/24; i++) {
Chris@27 404 if (i < nChorddict/24 - 1) {
Chris@27 405 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
Chris@27 406 } else {
Chris@27 407 sprintf(buffer1, "N");
Chris@27 408 }
Chris@27 409 ostringstream os;
Chris@27 410 os << buffer1;
Chris@27 411 loadedChordNames.push_back(os.str());
Chris@27 412
Chris@27 413 }
Chris@27 414
Chris@27 415 }
Chris@27 416 // cerr << "before leaving" << chordnames[1] << endl;
Chris@27 417 return loadedChordNames;
Chris@27 418 }