Chris@35: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@35: Chris@35: /* Chris@35: NNLS-Chroma / Chordino Chris@35: Chris@35: Audio feature extraction plugins for chromagram and chord Chris@35: estimation. Chris@35: Chris@35: Centre for Digital Music, Queen Mary University of London. Chris@35: This file copyright 2008-2010 Matthias Mauch and QMUL. Chris@35: Chris@35: This program is free software; you can redistribute it and/or Chris@35: modify it under the terms of the GNU General Public License as Chris@35: published by the Free Software Foundation; either version 2 of the Chris@35: License, or (at your option) any later version. See the file Chris@35: COPYING included with this distribution for more information. Chris@35: */ Chris@35: Chris@27: #include "chromamethods.h" Chris@27: Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: #include Chris@27: Chris@27: #include "chorddict.cpp" Chris@27: Chris@27: using namespace std; Chris@27: using namespace boost; Chris@27: Chris@27: Chris@27: /** Special Convolution Chris@27: special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the Chris@27: convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values Chris@27: as the first (last) valid convolution bin. Chris@27: **/ Chris@27: Chris@27: vector SpecialConvolution(vector convolvee, vector kernel) Chris@27: { Chris@27: float s; Chris@27: int m, n; Chris@27: int lenConvolvee = convolvee.size(); Chris@27: int lenKernel = kernel.size(); Chris@27: Chris@27: vector Z(256,0); Chris@27: assert(lenKernel % 2 != 0); // no exception handling !!! Chris@27: Chris@27: for (n = lenKernel - 1; n < lenConvolvee; n++) { Chris@27: s=0.0; Chris@27: for (m = 0; m < lenKernel; m++) { Chris@27: // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n'; Chris@27: s += convolvee[n-m] * kernel[m]; Chris@27: // if (debug_on) cerr << "--> s = " << s << '\n'; Chris@27: } Chris@27: // cerr << n - lenKernel/2 << endl; Chris@27: Z[n -lenKernel/2] = s; Chris@27: } Chris@27: Chris@27: // fill upper and lower pads Chris@27: for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2]; Chris@27: for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = Chris@27: Z[lenConvolvee - lenKernel/2 - 1]; Chris@27: return Z; Chris@27: } Chris@27: Chris@27: // vector FftBin2Frequency(vector binnumbers, int fs, int blocksize) Chris@27: // { Chris@27: // vector freq(binnumbers.size, 0.0); Chris@27: // for (unsigned i = 0; i < binnumbers.size; ++i) { Chris@27: // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize; Chris@27: // } Chris@27: // return freq; Chris@27: // } Chris@27: Chris@27: float cospuls(float x, float centre, float width) Chris@27: { Chris@27: float recipwidth = 1.0/width; Chris@27: if (abs(x - centre) <= 0.5 * width) { Chris@27: return cos((x-centre)*2*M_PI*recipwidth)*.5+.5; Chris@27: } Chris@27: return 0.0; Chris@27: } Chris@27: Chris@27: float pitchCospuls(float x, float centre, int binsperoctave) Chris@27: { Chris@27: float warpedf = -binsperoctave * (log2(centre) - log2(x)); Chris@27: float out = cospuls(warpedf, 0.0, 2.0); Chris@27: // now scale to correct for note density Chris@27: float c = log(2.0)/binsperoctave; Chris@27: if (x > 0) { Chris@27: out = out / (c * x); Chris@27: } else { Chris@27: out = 0; Chris@27: } Chris@27: return out; Chris@27: } Chris@27: Chris@27: bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { Chris@27: Chris@27: int binspersemitone = 3; // this must be 3 Chris@27: int minoctave = 0; // this must be 0 Chris@27: int maxoctave = 7; // this must be 7 Chris@27: int oversampling = 80; Chris@27: Chris@27: // linear frequency vector Chris@27: vector fft_f; Chris@27: for (int i = 0; i < blocksize/2; ++i) { Chris@27: fft_f.push_back(i * (fs * 1.0 / blocksize)); Chris@27: } Chris@27: float fft_width = fs * 2.0 / blocksize; Chris@27: Chris@27: // linear oversampled frequency vector Chris@27: vector oversampled_f; Chris@27: for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) { Chris@27: oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling)); Chris@27: } Chris@27: Chris@27: // pitch-spaced frequency vector Chris@27: int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! Chris@27: int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! Chris@27: vector cq_f; Chris@27: float oob = 1.0/binspersemitone; // one over binspersemitone Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); Chris@27: for (int i = minMIDI + 1; i < maxMIDI; ++i) { Chris@27: for (int k = -1; k < 2; ++k) { Chris@27: cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); Chris@27: } Chris@27: } Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); Chris@27: Chris@27: int nFFT = fft_f.size(); Chris@27: Chris@27: vector fft_activation; Chris@27: for (int iOS = 0; iOS < 2 * oversampling; ++iOS) { Chris@27: float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width); Chris@27: fft_activation.push_back(cosp); Chris@27: // cerr << cosp << endl; Chris@27: } Chris@27: Chris@27: float cq_activation; Chris@27: for (int iFFT = 1; iFFT < nFFT; ++iFFT) { Chris@27: // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency) Chris@27: int curr_start = oversampling * iFFT - oversampling; Chris@27: int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here Chris@27: // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl; Chris@27: for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) { Chris@27: outmatrix[iFFT + nFFT * iCQ] = 0; Chris@27: if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood Chris@27: for (int iOS = curr_start; iOS < curr_end; ++iOS) { Chris@27: cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12); Chris@27: // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl; Chris@27: outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start]; Chris@27: } Chris@27: // if (iCQ == 1 || iCQ == 2) { Chris@27: // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl; Chris@27: // } Chris@27: } Chris@27: } Chris@27: } Chris@27: return true; Chris@27: } Chris@27: Chris@27: void dictionaryMatrix(float* dm) { Chris@27: int binspersemitone = 3; // this must be 3 Chris@27: int minoctave = 0; // this must be 0 Chris@27: int maxoctave = 7; // this must be 7 Chris@27: float s_param = 0.7; Chris@27: Chris@27: // pitch-spaced frequency vector Chris@27: int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! Chris@27: int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! Chris@27: vector cq_f; Chris@27: float oob = 1.0/binspersemitone; // one over binspersemitone Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); Chris@27: for (int i = minMIDI + 1; i < maxMIDI; ++i) { Chris@27: for (int k = -1; k < 2; ++k) { Chris@27: cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); Chris@27: } Chris@27: } Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); Chris@27: cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); Chris@27: Chris@27: float curr_f; Chris@27: float floatbin; Chris@27: float curr_amp; Chris@27: // now for every combination calculate the matrix element Chris@27: for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) { Chris@27: // cerr << iOut << endl; Chris@27: for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) { Chris@27: curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm; Chris@27: // if (curr_f > cq_f[nNote-1]) break; Chris@27: floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm); Chris@27: // cerr << floatbin << endl; Chris@27: curr_amp = pow(s_param,float(iHarm-1)); Chris@27: // cerr << "curramp" << curr_amp << endl; Chris@27: for (unsigned iNote = 0; iNote < nNote; ++iNote) { Chris@27: if (abs(iNote+1.0-floatbin)<2) { Chris@27: dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp; Chris@27: // dm[iNote + nNote * iOut] += 1 * curr_amp; Chris@27: } Chris@27: } Chris@27: } Chris@27: } Chris@27: Chris@27: Chris@27: } Chris@27: Chris@30: static Chris@30: std::vector Chris@30: getPluginPath() Chris@30: { Chris@30: //!!! This is duplicated from PluginHostAdapter::getPluginPath, Chris@30: //!!! which is not available to us in the plugin (only to the Chris@30: //!!! host) Chris@30: Chris@30: std::vector path; Chris@30: std::string envPath; Chris@30: Chris@30: char *cpath = getenv("VAMP_PATH"); Chris@30: if (cpath) envPath = cpath; Chris@30: Chris@30: #ifdef _WIN32 Chris@30: #define PATH_SEPARATOR ';' Chris@30: #define DEFAULT_VAMP_PATH "%ProgramFiles%\\Vamp Plugins" Chris@30: #else Chris@30: #define PATH_SEPARATOR ':' Chris@30: #ifdef __APPLE__ Chris@30: #define DEFAULT_VAMP_PATH "$HOME/Library/Audio/Plug-Ins/Vamp:/Library/Audio/Plug-Ins/Vamp" Chris@30: #else Chris@30: #define DEFAULT_VAMP_PATH "$HOME/vamp:$HOME/.vamp:/usr/local/lib/vamp:/usr/lib/vamp" Chris@30: #endif Chris@30: #endif Chris@30: Chris@30: if (envPath == "") { Chris@30: envPath = DEFAULT_VAMP_PATH; Chris@30: char *chome = getenv("HOME"); Chris@30: if (chome) { Chris@30: std::string home(chome); Chris@30: std::string::size_type f; Chris@30: while ((f = envPath.find("$HOME")) != std::string::npos && Chris@30: f < envPath.length()) { Chris@30: envPath.replace(f, 5, home); Chris@30: } Chris@30: } Chris@30: #ifdef _WIN32 Chris@30: char *cpfiles = getenv("ProgramFiles"); Chris@30: if (!cpfiles) cpfiles = (char *)"C:\\Program Files"; Chris@30: std::string pfiles(cpfiles); Chris@30: std::string::size_type f; Chris@30: while ((f = envPath.find("%ProgramFiles%")) != std::string::npos && Chris@30: f < envPath.length()) { Chris@30: envPath.replace(f, 14, pfiles); Chris@30: } Chris@30: #endif Chris@30: } Chris@30: Chris@30: std::string::size_type index = 0, newindex = 0; Chris@30: Chris@30: while ((newindex = envPath.find(PATH_SEPARATOR, index)) < envPath.size()) { Chris@30: path.push_back(envPath.substr(index, newindex - index)); Chris@30: index = newindex + 1; Chris@30: } Chris@30: Chris@30: path.push_back(envPath.substr(index)); Chris@30: Chris@30: return path; Chris@30: } Chris@27: Chris@27: vector chordDictionary(vector *mchorddict) { Chris@30: Chris@27: typedef tokenizer > Tok; Chris@27: char_separator sep(",; ","="); Chris@30: Chris@30: string chordDictBase("chord.dict"); Chris@30: string chordDictFilename; Chris@30: Chris@30: vector ppath = getPluginPath(); Chris@30: for (int i = 0; i < ppath.size(); ++i) { Chris@30: chordDictFilename = ppath[i] + "/" + chordDictBase; Chris@30: cerr << "Looking for chord.dict in " << chordDictFilename << "..." << endl; Chris@30: if (iostreams::stream(chordDictFilename.c_str()) Chris@30: .is_open()) { Chris@30: cerr << "(Success)" << endl; Chris@30: break; Chris@30: } Chris@30: } Chris@30: Chris@30: iostreams::stream chordDictFile(chordDictFilename); Chris@27: string line; Chris@27: int iElement = 0; Chris@27: int nChord = 0; Chris@27: Chris@27: vector loadedChordNames; Chris@27: vector loadedChordDict; Chris@27: if (chordDictFile.is_open()) { Chris@27: while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file Chris@27: // first, get the chord definition Chris@27: string chordType; Chris@27: vector tempPCVector; Chris@27: // cerr << line << endl; Chris@27: if (!line.empty() && line.substr(0,1) != "#") { Chris@27: Tok tok(line, sep); Chris@27: for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements Chris@27: string tempString = *tok_iter; Chris@27: // cerr << tempString << endl; Chris@27: if (tok_iter == tok.begin()) { // either the chord name or a colon Chris@27: if (tempString == "=") { Chris@27: chordType = ""; Chris@27: } else { Chris@27: chordType = tempString; Chris@27: tok_iter++; // is this cheating ? :) Chris@27: } Chris@27: } else { Chris@27: tempPCVector.push_back(lexical_cast(*tok_iter)); Chris@27: } Chris@27: } Chris@27: Chris@27: // now make all 12 chords of every type Chris@27: for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) { Chris@27: // add bass slash notation Chris@27: string slashNotation = ""; Chris@27: for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) { Chris@27: if (tempPCVector[(kSemitone) % 12] > 0.99) { Chris@27: slashNotation = bassnames[iSemitone][kSemitone]; Chris@27: } Chris@27: } Chris@27: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes Chris@27: // cerr << ((kSemitone - iSemitone + 12) % 12) << endl; Chris@27: float bassValue = 0; Chris@27: if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) { Chris@27: bassValue = 1; Chris@27: } else { Chris@27: if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5; Chris@27: } Chris@27: loadedChordDict.push_back(bassValue); Chris@27: } Chris@27: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes Chris@27: loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]); Chris@27: } Chris@27: ostringstream os; Chris@27: if (slashNotation.empty()) { Chris@27: os << notenames[12+iSemitone] << chordType; Chris@27: } else { Chris@27: os << notenames[12+iSemitone] << chordType << "/" << slashNotation; Chris@27: } Chris@27: // cerr << os.str() << endl; Chris@27: loadedChordNames.push_back(os.str()); Chris@27: } Chris@27: } Chris@27: } Chris@27: // N type Chris@27: loadedChordNames.push_back("N"); Chris@27: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5); Chris@27: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0); Chris@27: Chris@27: // normalise Chris@27: float sum = 0; Chris@27: for (int i = 0; i < loadedChordDict.size(); i++) { Chris@27: sum += pow(loadedChordDict[i],2); Chris@27: if (i % 24 == 23) { Chris@27: float invertedsum = 1.0/sqrt(sum); Chris@27: for (int k = 0; k < 24; k++) { Chris@27: loadedChordDict[i-k] *= invertedsum; Chris@27: } Chris@27: sum = 0; Chris@27: } Chris@27: Chris@27: } Chris@27: Chris@27: Chris@27: nChord = 0; Chris@27: for (int i = 0; i < loadedChordNames.size(); i++) { Chris@27: nChord++; Chris@27: } Chris@27: chordDictFile.close(); Chris@27: Chris@27: Chris@27: // mchorddict = new float[nChord*24]; Chris@27: for (int i = 0; i < nChord*24; i++) { Chris@27: mchorddict->push_back(loadedChordDict[i]); Chris@27: } Chris@27: Chris@27: } else {// use default from chorddict.cpp Chris@27: // mchorddict = new float[nChorddict]; Chris@27: for (int i = 0; i < nChorddict; i++) { Chris@27: mchorddict->push_back(chorddict[i]); Chris@27: } Chris@27: Chris@27: nChord = nChorddict/24; Chris@27: // mchordnames = new string[nChorddict/24]; Chris@27: char buffer1 [50]; Chris@27: for (int i = 0; i < nChorddict/24; i++) { Chris@27: if (i < nChorddict/24 - 1) { Chris@27: sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]); Chris@27: } else { Chris@27: sprintf(buffer1, "N"); Chris@27: } Chris@27: ostringstream os; Chris@27: os << buffer1; Chris@27: loadedChordNames.push_back(os.str()); Chris@27: Chris@27: } Chris@27: Chris@27: } Chris@27: // cerr << "before leaving" << chordnames[1] << endl; Chris@27: return loadedChordNames; Chris@27: }