matthiasm@0: matthiasm@0: #include "NNLSChroma.h" matthiasm@0: #include matthiasm@10: // #include matthiasm@0: #include matthiasm@0: #include matthiasm@3: #include matthiasm@0: #include matthiasm@0: #include matthiasm@7: #include matthiasm@0: #include matthiasm@7: #include matthiasm@7: #include matthiasm@7: #include matthiasm@7: #include matthiasm@1: #include "nnls.h" matthiasm@0: #include "chorddict.cpp" matthiasm@9: matthiasm@10: // #include matthiasm@10: // #define N 1000 matthiasm@10: // #define CHUNKSIZE 100 matthiasm@9: matthiasm@9: matthiasm@0: using namespace std; matthiasm@7: using namespace boost; matthiasm@0: matthiasm@0: const float sinvalue = 0.866025404; matthiasm@0: const float cosvalue = -0.5; matthiasm@0: const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082}; matthiasm@0: const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}; matthiasm@0: const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350}; matthiasm@0: const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)", matthiasm@0: "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"}; matthiasm@7: matthiasm@7: const char* bassnames[12][12] ={ matthiasm@7: {"A","","B","C","C#","D","","E","","F#","G","G#"}, matthiasm@7: {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"}, matthiasm@7: {"B","","C#","D","D#","E","","F#","","G#","A","A#"}, matthiasm@7: {"C","","D","Eb","E","F","","G","","A","Bb","B"}, matthiasm@7: {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"}, matthiasm@7: {"D","","E","F","F#","G","","A","","B","C","C#"}, matthiasm@7: {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"}, matthiasm@7: {"E","","F#","G","G#","A","","B","","C#","D","D#"}, matthiasm@7: {"F","","G","Ab","A","Bb","","C","","D","Eb","E"}, matthiasm@7: {"F#","","G#","A","A#","B","","C#","","D#","E","E#"}, matthiasm@7: {"G","","A","Bb","B","C","","D","","E","F","F#"}, matthiasm@7: {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"} matthiasm@7: }; matthiasm@0: const vector hw(hammingwind, hammingwind+19); matthiasm@0: const int nNote = 256; matthiasm@0: matthiasm@0: /** Special Convolution matthiasm@0: special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the matthiasm@0: convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values matthiasm@0: as the first (last) valid convolution bin. matthiasm@0: **/ matthiasm@0: matthiasm@0: const bool debug_on = false; matthiasm@0: matthiasm@0: vector SpecialConvolution(vector convolvee, vector kernel) matthiasm@0: { matthiasm@0: float s; matthiasm@0: int m, n; matthiasm@0: int lenConvolvee = convolvee.size(); matthiasm@0: int lenKernel = kernel.size(); matthiasm@0: matthiasm@0: vector Z(256,0); matthiasm@0: assert(lenKernel % 2 != 0); // no exception handling !!! matthiasm@0: matthiasm@0: for (n = lenKernel - 1; n < lenConvolvee; n++) { matthiasm@0: s=0.0; matthiasm@0: for (m = 0; m < lenKernel; m++) { matthiasm@0: // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n'; matthiasm@0: s += convolvee[n-m] * kernel[m]; matthiasm@0: // if (debug_on) cerr << "--> s = " << s << '\n'; matthiasm@0: } matthiasm@0: // cerr << n - lenKernel/2 << endl; matthiasm@0: Z[n -lenKernel/2] = s; matthiasm@0: } matthiasm@0: matthiasm@0: // fill upper and lower pads matthiasm@0: for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2]; matthiasm@0: for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = matthiasm@0: Z[lenConvolvee - lenKernel/2 - 1]; matthiasm@0: return Z; matthiasm@0: } matthiasm@0: matthiasm@0: // vector FftBin2Frequency(vector binnumbers, int fs, int blocksize) matthiasm@0: // { matthiasm@0: // vector freq(binnumbers.size, 0.0); matthiasm@0: // for (unsigned i = 0; i < binnumbers.size; ++i) { matthiasm@0: // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize; matthiasm@0: // } matthiasm@0: // return freq; matthiasm@0: // } matthiasm@0: matthiasm@0: float cospuls(float x, float centre, float width) matthiasm@0: { matthiasm@0: float recipwidth = 1.0/width; matthiasm@0: if (abs(x - centre) <= 0.5 * width) { matthiasm@0: return cos((x-centre)*2*M_PI*recipwidth)*.5+.5; matthiasm@0: } matthiasm@0: return 0.0; matthiasm@0: } matthiasm@0: matthiasm@0: float pitchCospuls(float x, float centre, int binsperoctave) matthiasm@0: { matthiasm@0: float warpedf = -binsperoctave * (log2(centre) - log2(x)); matthiasm@0: float out = cospuls(warpedf, 0.0, 2.0); matthiasm@0: // now scale to correct for note density matthiasm@0: float c = log(2.0)/binsperoctave; matthiasm@0: if (x > 0) { matthiasm@0: out = out / (c * x); matthiasm@0: } else { matthiasm@0: out = 0; matthiasm@0: } matthiasm@0: return out; matthiasm@0: } matthiasm@0: matthiasm@0: bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { matthiasm@0: matthiasm@0: int binspersemitone = 3; // this must be 3 matthiasm@0: int minoctave = 0; // this must be 0 matthiasm@0: int maxoctave = 7; // this must be 7 matthiasm@1: int oversampling = 80; matthiasm@0: matthiasm@0: // linear frequency vector matthiasm@0: vector fft_f; matthiasm@0: for (int i = 0; i < blocksize/2; ++i) { matthiasm@0: fft_f.push_back(i * (fs * 1.0 / blocksize)); matthiasm@0: } matthiasm@0: float fft_width = fs * 2.0 / blocksize; matthiasm@0: matthiasm@0: // linear oversampled frequency vector matthiasm@0: vector oversampled_f; matthiasm@0: for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) { matthiasm@0: oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling)); matthiasm@0: } matthiasm@0: matthiasm@0: // pitch-spaced frequency vector matthiasm@0: int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! matthiasm@0: int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! matthiasm@0: vector cq_f; matthiasm@0: float oob = 1.0/binspersemitone; // one over binspersemitone matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); matthiasm@0: for (int i = minMIDI + 1; i < maxMIDI; ++i) { matthiasm@0: for (int k = -1; k < 2; ++k) { matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); matthiasm@0: } matthiasm@0: } matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); matthiasm@0: matthiasm@0: int nFFT = fft_f.size(); matthiasm@0: matthiasm@0: vector fft_activation; matthiasm@0: for (int iOS = 0; iOS < 2 * oversampling; ++iOS) { matthiasm@0: float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width); matthiasm@0: fft_activation.push_back(cosp); matthiasm@0: // cerr << cosp << endl; matthiasm@0: } matthiasm@0: matthiasm@0: float cq_activation; matthiasm@0: for (int iFFT = 1; iFFT < nFFT; ++iFFT) { matthiasm@0: // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency) matthiasm@0: int curr_start = oversampling * iFFT - oversampling; matthiasm@0: int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here matthiasm@0: // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl; matthiasm@0: for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) { matthiasm@0: outmatrix[iFFT + nFFT * iCQ] = 0; matthiasm@1: if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood matthiasm@0: for (int iOS = curr_start; iOS < curr_end; ++iOS) { matthiasm@0: cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12); matthiasm@0: // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl; matthiasm@0: outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start]; matthiasm@0: } matthiasm@0: // if (iCQ == 1 || iCQ == 2) { matthiasm@0: // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl; matthiasm@0: // } matthiasm@0: } matthiasm@0: } matthiasm@0: } matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@3: bool dictionaryMatrix(float* dm) { matthiasm@1: int binspersemitone = 3; // this must be 3 matthiasm@1: int minoctave = 0; // this must be 0 matthiasm@1: int maxoctave = 7; // this must be 7 matthiasm@4: float s_param = 0.7; matthiasm@1: matthiasm@1: // pitch-spaced frequency vector matthiasm@1: int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! matthiasm@1: int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! matthiasm@1: vector cq_f; matthiasm@1: float oob = 1.0/binspersemitone; // one over binspersemitone matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); matthiasm@1: for (int i = minMIDI + 1; i < maxMIDI; ++i) { matthiasm@1: for (int k = -1; k < 2; ++k) { matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); matthiasm@1: } matthiasm@1: } matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); matthiasm@1: matthiasm@1: float curr_f; matthiasm@1: float floatbin; matthiasm@1: float curr_amp; matthiasm@1: // now for every combination calculate the matrix element matthiasm@1: for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) { matthiasm@3: // cerr << iOut << endl; matthiasm@1: for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) { matthiasm@1: curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm; matthiasm@3: // if (curr_f > cq_f[nNote-1]) break; matthiasm@3: floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm); matthiasm@3: // cerr << floatbin << endl; matthiasm@1: curr_amp = pow(s_param,float(iHarm-1)); matthiasm@3: // cerr << "curramp" << curr_amp << endl; matthiasm@1: for (unsigned iNote = 0; iNote < nNote; ++iNote) { matthiasm@3: if (abs(iNote+1.0-floatbin)<2) { matthiasm@3: dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp; matthiasm@3: // dm[iNote + nNote * iOut] += 1 * curr_amp; matthiasm@3: } matthiasm@1: } matthiasm@3: } matthiasm@1: } matthiasm@3: matthiasm@3: matthiasm@1: } matthiasm@1: matthiasm@7: string get_env_var( std::string const & key ) { matthiasm@7: char * val; matthiasm@7: val = getenv( key.c_str() ); matthiasm@7: string retval; matthiasm@7: if (val != NULL) { matthiasm@7: retval = val; matthiasm@7: } matthiasm@7: return retval; matthiasm@7: } matthiasm@7: matthiasm@7: matthiasm@9: vector chordDictionary(vector *mchorddict) { matthiasm@7: // ifstream chordDictFile; matthiasm@7: string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict"); matthiasm@7: // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n"; matthiasm@7: typedef tokenizer > Tok; matthiasm@7: // char_separator sep; // default constructed matthiasm@7: char_separator sep(",; ",":"); matthiasm@7: iostreams::stream chordDictFile(chordDictFilename.c_str()); matthiasm@7: string line; matthiasm@7: int iElement = 0; matthiasm@7: int nChord = 0; matthiasm@7: matthiasm@7: vector loadedChordNames; matthiasm@7: vector loadedChordDict; matthiasm@7: if (chordDictFile.is_open()) { matthiasm@7: while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file matthiasm@7: // first, get the chord definition matthiasm@7: string chordType; matthiasm@7: vector tempPCVector; matthiasm@7: // cerr << line << endl; matthiasm@7: if (!line.empty() && line.substr(0,1) != "#") { matthiasm@7: Tok tok(line, sep); matthiasm@7: for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements matthiasm@7: string tempString = *tok_iter; matthiasm@7: // cerr << tempString << endl; matthiasm@7: if (tok_iter == tok.begin()) { // either the chord name or a colon matthiasm@7: if (tempString == ":") { matthiasm@7: chordType = ""; matthiasm@7: } else { matthiasm@7: chordType = tempString; matthiasm@7: tok_iter++; // is this cheating ? :) matthiasm@7: } matthiasm@7: } else { matthiasm@7: tempPCVector.push_back(lexical_cast(*tok_iter)); matthiasm@7: } matthiasm@7: } matthiasm@7: matthiasm@7: // now make all 12 chords of every type matthiasm@7: for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) { matthiasm@7: // add bass slash notation matthiasm@7: string slashNotation = ""; matthiasm@7: for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) { matthiasm@7: if (tempPCVector[(kSemitone) % 12] > 0.99) { matthiasm@7: slashNotation = bassnames[iSemitone][kSemitone]; matthiasm@7: } matthiasm@7: } matthiasm@7: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes matthiasm@9: // cerr << ((kSemitone - iSemitone + 12) % 12) << endl; matthiasm@9: float bassValue = 0; matthiasm@9: if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) { matthiasm@9: bassValue = 1; matthiasm@9: } else { matthiasm@10: if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5; matthiasm@9: } matthiasm@9: loadedChordDict.push_back(bassValue); matthiasm@7: } matthiasm@7: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes matthiasm@7: loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]); matthiasm@7: } matthiasm@7: ostringstream os; matthiasm@7: if (slashNotation.empty()) { matthiasm@7: os << notenames[12+iSemitone] << chordType; matthiasm@7: } else { matthiasm@7: os << notenames[12+iSemitone] << chordType << "/" << slashNotation; matthiasm@7: } matthiasm@7: matthiasm@7: loadedChordNames.push_back(os.str()); matthiasm@7: } matthiasm@7: } matthiasm@7: } matthiasm@7: // N type matthiasm@7: loadedChordNames.push_back("N"); matthiasm@7: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5); matthiasm@7: for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0); matthiasm@7: matthiasm@7: // normalise matthiasm@7: float sum = 0; matthiasm@7: for (int i = 0; i < loadedChordDict.size(); i++) { matthiasm@7: sum += pow(loadedChordDict[i],2); matthiasm@7: if (i % 24 == 23) { matthiasm@7: float invertedsum = 1.0/sqrt(sum); matthiasm@7: for (int k = 0; k < 24; k++) { matthiasm@7: loadedChordDict[i-k] *= invertedsum; matthiasm@7: } matthiasm@7: sum = 0; matthiasm@7: } matthiasm@7: matthiasm@7: } matthiasm@7: matthiasm@7: matthiasm@7: nChord = 0; matthiasm@7: for (int i = 0; i < loadedChordNames.size(); i++) { matthiasm@7: nChord++; matthiasm@7: } matthiasm@7: chordDictFile.close(); matthiasm@7: matthiasm@7: matthiasm@9: // mchorddict = new float[nChord*24]; matthiasm@7: for (int i = 0; i < nChord*24; i++) { matthiasm@9: mchorddict->push_back(loadedChordDict[i]); matthiasm@7: } matthiasm@9: matthiasm@7: } else {// use default from chorddict.cpp matthiasm@9: // mchorddict = new float[nChorddict]; matthiasm@7: for (int i = 0; i < nChorddict; i++) { matthiasm@9: mchorddict->push_back(chorddict[i]); matthiasm@7: } matthiasm@7: matthiasm@7: nChord = nChorddict/24; matthiasm@7: // mchordnames = new string[nChorddict/24]; matthiasm@7: char buffer1 [50]; matthiasm@7: for (int i = 0; i < nChorddict/24; i++) { matthiasm@7: if (i < nChorddict/24 - 1) { matthiasm@7: sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]); matthiasm@7: } else { matthiasm@7: sprintf(buffer1, "N"); matthiasm@7: } matthiasm@7: ostringstream os; matthiasm@7: os << buffer1; matthiasm@9: loadedChordNames.push_back(os.str()); matthiasm@9: matthiasm@7: } matthiasm@7: matthiasm@7: } matthiasm@9: // cerr << "before leaving" << chordnames[1] << endl; matthiasm@9: return loadedChordNames; matthiasm@7: } matthiasm@0: matthiasm@0: NNLSChroma::NNLSChroma(float inputSampleRate) : matthiasm@0: Plugin(inputSampleRate), matthiasm@0: m_fl(0), matthiasm@0: m_blockSize(0), matthiasm@0: m_stepSize(0), matthiasm@0: m_lengthOfNoteIndex(0), matthiasm@0: m_meanTuning0(0), matthiasm@0: m_meanTuning1(0), matthiasm@0: m_meanTuning2(0), matthiasm@0: m_localTuning0(0), matthiasm@0: m_localTuning1(0), matthiasm@0: m_localTuning2(0), matthiasm@4: m_paling(1.0), matthiasm@3: m_preset(0.0), matthiasm@0: m_localTuning(0), matthiasm@0: m_kernelValue(0), matthiasm@0: m_kernelFftIndex(0), matthiasm@0: m_kernelNoteIndex(0), matthiasm@1: m_dict(0), matthiasm@0: m_tuneLocal(false), matthiasm@7: m_dictID(0), matthiasm@7: m_chorddict(0), matthiasm@12: m_chordnames(0), matthiasm@12: m_doNormalizeChroma(0) matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> NNLSChroma" << endl; matthiasm@7: matthiasm@7: // make the *note* dictionary matrix matthiasm@3: m_dict = new float[nNote * 84]; matthiasm@3: for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; matthiasm@1: dictionaryMatrix(m_dict); matthiasm@7: matthiasm@7: // get the *chord* dictionary from file (if the file exists) matthiasm@9: m_chordnames = chordDictionary(&m_chorddict); matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: NNLSChroma::~NNLSChroma() matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> ~NNLSChroma" << endl; matthiasm@1: delete [] m_dict; matthiasm@9: // delete [] m_chorddict; matthiasm@7: // delete m_chordnames; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getIdentifier() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getIdentifier" << endl; matthiasm@0: return "nnls_chroma"; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getName() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getName" << endl; matthiasm@0: return "NNLS Chroma"; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getDescription() const matthiasm@0: { matthiasm@0: // Return something helpful here! matthiasm@0: if (debug_on) cerr << "--> getDescription" << endl; matthiasm@4: return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription."; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getMaker() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getMaker" << endl; matthiasm@0: // Your name here matthiasm@0: return "Matthias Mauch"; matthiasm@0: } matthiasm@0: matthiasm@0: int matthiasm@0: NNLSChroma::getPluginVersion() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPluginVersion" << endl; matthiasm@0: // Increment this each time you release a version that behaves matthiasm@0: // differently from the previous one matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getCopyright() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getCopyright" << endl; matthiasm@0: // This function is not ideally named. It does not necessarily matthiasm@0: // need to say who made the plugin -- getMaker does that -- but it matthiasm@0: // should indicate the terms under which it is distributed. For matthiasm@0: // example, "Copyright (year). All Rights Reserved", or "GPL" matthiasm@0: return "Copyright (2010). All rights reserved."; matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::InputDomain matthiasm@0: NNLSChroma::getInputDomain() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getInputDomain" << endl; matthiasm@0: return FrequencyDomain; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getPreferredBlockSize() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPreferredBlockSize" << endl; matthiasm@0: return 16384; // 0 means "I can handle any block size" matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getPreferredStepSize() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPreferredStepSize" << endl; matthiasm@0: return 2048; // 0 means "anything sensible"; in practice this matthiasm@0: // means the same as the block size for TimeDomain matthiasm@0: // plugins, or half of it for FrequencyDomain plugins matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getMinChannelCount() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getMinChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getMaxChannelCount() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getMaxChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::ParameterList matthiasm@0: NNLSChroma::getParameterDescriptors() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getParameterDescriptors" << endl; matthiasm@0: ParameterList list; matthiasm@0: matthiasm@3: ParameterDescriptor d3; matthiasm@3: d3.identifier = "preset"; matthiasm@3: d3.name = "preset"; matthiasm@3: d3.description = "Spectral paling: no paling - 0; whitening - 1."; matthiasm@3: d3.unit = ""; matthiasm@3: d3.isQuantized = true; matthiasm@3: d3.quantizeStep = 1; matthiasm@3: d3.minValue = 0.0; matthiasm@4: d3.maxValue = 3.0; matthiasm@3: d3.defaultValue = 0.0; matthiasm@3: d3.valueNames.push_back("polyphonic pop"); matthiasm@3: d3.valueNames.push_back("polyphonic pop (fast)"); matthiasm@3: d3.valueNames.push_back("solo keyboard"); matthiasm@3: d3.valueNames.push_back("manual"); matthiasm@3: list.push_back(d3); matthiasm@4: matthiasm@4: // ParameterDescriptor d0; matthiasm@4: // d0.identifier = "notedict"; matthiasm@4: // d0.name = "note dictionary"; matthiasm@4: // d0.description = "Notes in different note dictionaries differ by their spectral shapes."; matthiasm@4: // d0.unit = ""; matthiasm@4: // d0.minValue = 0; matthiasm@4: // d0.maxValue = 1; matthiasm@4: // d0.defaultValue = 0; matthiasm@4: // d0.isQuantized = true; matthiasm@4: // d0.valueNames.push_back("s = 0.6"); matthiasm@4: // d0.valueNames.push_back("no NNLS"); matthiasm@4: // d0.quantizeStep = 1.0; matthiasm@4: // list.push_back(d0); matthiasm@4: matthiasm@4: ParameterDescriptor d1; matthiasm@4: d1.identifier = "tuningmode"; matthiasm@4: d1.name = "tuning mode"; matthiasm@4: d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; matthiasm@4: d1.unit = ""; matthiasm@4: d1.minValue = 0; matthiasm@4: d1.maxValue = 1; matthiasm@4: d1.defaultValue = 0; matthiasm@4: d1.isQuantized = true; matthiasm@4: d1.valueNames.push_back("global tuning"); matthiasm@4: d1.valueNames.push_back("local tuning"); matthiasm@4: d1.quantizeStep = 1.0; matthiasm@4: list.push_back(d1); matthiasm@4: matthiasm@4: // ParameterDescriptor d2; matthiasm@4: // d2.identifier = "paling"; matthiasm@4: // d2.name = "spectral paling"; matthiasm@4: // d2.description = "Spectral paling: no paling - 0; whitening - 1."; matthiasm@4: // d2.unit = ""; matthiasm@4: // d2.isQuantized = true; matthiasm@4: // // d2.quantizeStep = 0.1; matthiasm@4: // d2.minValue = 0.0; matthiasm@4: // d2.maxValue = 1.0; matthiasm@4: // d2.defaultValue = 1.0; matthiasm@4: // d2.isQuantized = false; matthiasm@4: // list.push_back(d2); matthiasm@12: ParameterDescriptor d4; matthiasm@12: d4.identifier = "chromanormalize"; matthiasm@12: d4.name = "chroma normalization"; matthiasm@12: d4.description = "How shall the chroma vector be normalized?"; matthiasm@12: d4.unit = ""; matthiasm@12: d4.minValue = 0; matthiasm@12: d4.maxValue = 1; matthiasm@12: d4.defaultValue = 0; matthiasm@12: d4.isQuantized = true; matthiasm@12: d4.valueNames.push_back("no normalization"); matthiasm@12: d4.valueNames.push_back("maximum normalization"); matthiasm@12: d4.quantizeStep = 1.0; matthiasm@12: list.push_back(d4); matthiasm@4: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: float matthiasm@0: NNLSChroma::getParameter(string identifier) const matthiasm@0: { matthiasm@3: if (debug_on) cerr << "--> getParameter" << endl; matthiasm@0: if (identifier == "notedict") { matthiasm@0: return m_dictID; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "paling") { matthiasm@0: return m_paling; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "tuningmode") { matthiasm@0: if (m_tuneLocal) { matthiasm@0: return 1.0; matthiasm@0: } else { matthiasm@0: return 0.0; matthiasm@0: } matthiasm@0: } matthiasm@3: if (identifier == "preset") { matthiasm@3: return m_preset; matthiasm@3: } matthiasm@12: if (identifier == "chromanormalize") { matthiasm@12: return m_doNormalizeChroma; matthiasm@12: } matthiasm@0: return 0; matthiasm@0: matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@0: NNLSChroma::setParameter(string identifier, float value) matthiasm@0: { matthiasm@3: if (debug_on) cerr << "--> setParameter" << endl; matthiasm@0: if (identifier == "notedict") { matthiasm@0: m_dictID = (int) value; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "paling") { matthiasm@0: m_paling = value; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "tuningmode") { matthiasm@0: m_tuneLocal = (value > 0) ? true : false; matthiasm@0: // cerr << "m_tuneLocal :" << m_tuneLocal << endl; matthiasm@0: } matthiasm@3: if (identifier == "preset") { matthiasm@3: m_preset = value; matthiasm@3: if (m_preset == 0.0) { matthiasm@3: m_tuneLocal = false; matthiasm@3: m_paling = 1.0; matthiasm@3: m_dictID = 0.0; matthiasm@3: } matthiasm@3: if (m_preset == 1.0) { matthiasm@3: m_tuneLocal = false; matthiasm@3: m_paling = 1.0; matthiasm@3: m_dictID = 1.0; matthiasm@3: } matthiasm@3: if (m_preset == 2.0) { matthiasm@3: m_tuneLocal = false; matthiasm@3: m_paling = 0.7; matthiasm@3: m_dictID = 0.0; matthiasm@3: } matthiasm@3: } matthiasm@12: if (identifier == "chromanormalize") { matthiasm@12: m_doNormalizeChroma = value; matthiasm@12: } matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::ProgramList matthiasm@0: NNLSChroma::getPrograms() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPrograms" << endl; matthiasm@0: ProgramList list; matthiasm@0: matthiasm@0: // If you have no programs, return an empty list (or simply don't matthiasm@0: // implement this function or getCurrentProgram/selectProgram) matthiasm@0: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getCurrentProgram() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getCurrentProgram" << endl; matthiasm@0: return ""; // no programs matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@0: NNLSChroma::selectProgram(string name) matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> selectProgram" << endl; matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: NNLSChroma::OutputList matthiasm@0: NNLSChroma::getOutputDescriptors() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getOutputDescriptors" << endl; matthiasm@0: OutputList list; matthiasm@0: matthiasm@0: // Make chroma names for the binNames property matthiasm@0: vector chromanames; matthiasm@0: vector bothchromanames; matthiasm@0: for (int iNote = 0; iNote < 24; iNote++) { matthiasm@0: bothchromanames.push_back(notenames[iNote]); matthiasm@0: if (iNote < 12) { matthiasm@0: chromanames.push_back(notenames[iNote]); matthiasm@0: } matthiasm@0: } matthiasm@0: matthiasm@1: // int nNote = 84; matthiasm@0: matthiasm@0: // See OutputDescriptor documentation for the possibilities here. matthiasm@0: // Every plugin must have at least one output. matthiasm@0: matthiasm@0: OutputDescriptor d0; matthiasm@0: d0.identifier = "tuning"; matthiasm@0: d0.name = "Tuning"; matthiasm@0: d0.description = "The concert pitch."; matthiasm@0: d0.unit = "Hz"; matthiasm@0: d0.hasFixedBinCount = true; matthiasm@0: d0.binCount = 0; matthiasm@0: d0.hasKnownExtents = true; matthiasm@0: d0.minValue = 427.47; matthiasm@0: d0.maxValue = 452.89; matthiasm@0: d0.isQuantized = false; matthiasm@0: d0.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@0: d0.hasDuration = false; matthiasm@0: list.push_back(d0); matthiasm@0: matthiasm@0: OutputDescriptor d1; matthiasm@0: d1.identifier = "logfreqspec"; matthiasm@0: d1.name = "Log-Frequency Spectrum"; matthiasm@0: d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping."; matthiasm@0: d1.unit = ""; matthiasm@0: d1.hasFixedBinCount = true; matthiasm@0: d1.binCount = nNote; matthiasm@0: d1.hasKnownExtents = false; matthiasm@0: d1.isQuantized = false; matthiasm@0: d1.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d1.hasDuration = false; matthiasm@0: d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d1); matthiasm@0: matthiasm@0: OutputDescriptor d2; matthiasm@0: d2.identifier = "tunedlogfreqspec"; matthiasm@0: d2.name = "Tuned Log-Frequency Spectrum"; matthiasm@0: d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency."; matthiasm@0: d2.unit = ""; matthiasm@0: d2.hasFixedBinCount = true; matthiasm@0: d2.binCount = 256; matthiasm@0: d2.hasKnownExtents = false; matthiasm@0: d2.isQuantized = false; matthiasm@0: d2.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d2.hasDuration = false; matthiasm@0: d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d2); matthiasm@0: matthiasm@0: OutputDescriptor d3; matthiasm@0: d3.identifier = "semitonespectrum"; matthiasm@0: d3.name = "Semitone Spectrum"; matthiasm@0: d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum."; matthiasm@0: d3.unit = ""; matthiasm@0: d3.hasFixedBinCount = true; matthiasm@0: d3.binCount = 84; matthiasm@0: d3.hasKnownExtents = false; matthiasm@0: d3.isQuantized = false; matthiasm@0: d3.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d3.hasDuration = false; matthiasm@0: d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d3); matthiasm@0: matthiasm@0: OutputDescriptor d4; matthiasm@0: d4.identifier = "chroma"; matthiasm@0: d4.name = "Chromagram"; matthiasm@0: d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range."; matthiasm@0: d4.unit = ""; matthiasm@0: d4.hasFixedBinCount = true; matthiasm@0: d4.binCount = 12; matthiasm@0: d4.binNames = chromanames; matthiasm@0: d4.hasKnownExtents = false; matthiasm@0: d4.isQuantized = false; matthiasm@0: d4.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d4.hasDuration = false; matthiasm@0: d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d4); matthiasm@0: matthiasm@0: OutputDescriptor d5; matthiasm@0: d5.identifier = "basschroma"; matthiasm@0: d5.name = "Bass Chromagram"; matthiasm@0: d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range."; matthiasm@0: d5.unit = ""; matthiasm@0: d5.hasFixedBinCount = true; matthiasm@0: d5.binCount = 12; matthiasm@0: d5.binNames = chromanames; matthiasm@0: d5.hasKnownExtents = false; matthiasm@0: d5.isQuantized = false; matthiasm@0: d5.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d5.hasDuration = false; matthiasm@0: d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d5); matthiasm@0: matthiasm@0: OutputDescriptor d6; matthiasm@0: d6.identifier = "bothchroma"; matthiasm@0: d6.name = "Chromagram and Bass Chromagram"; matthiasm@0: d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription."; matthiasm@0: d6.unit = ""; matthiasm@0: d6.hasFixedBinCount = true; matthiasm@0: d6.binCount = 24; matthiasm@0: d6.binNames = bothchromanames; matthiasm@0: d6.hasKnownExtents = false; matthiasm@0: d6.isQuantized = false; matthiasm@0: d6.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d6.hasDuration = false; matthiasm@0: d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d6); matthiasm@0: matthiasm@0: OutputDescriptor d7; matthiasm@0: d7.identifier = "simplechord"; matthiasm@0: d7.name = "Simple Chord Estimate"; matthiasm@0: d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma."; matthiasm@0: d7.unit = ""; matthiasm@0: d7.hasFixedBinCount = true; matthiasm@0: d7.binCount = 0; matthiasm@0: d7.hasKnownExtents = false; matthiasm@0: d7.isQuantized = false; matthiasm@0: d7.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@0: d7.hasDuration = false; matthiasm@0: d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d7); matthiasm@0: matthiasm@1: // OutputDescriptor d8; matthiasm@1: // d8.identifier = "inconsistency"; matthiasm@1: // d8.name = "Harmonic inconsistency value"; matthiasm@1: // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high."; matthiasm@1: // d8.unit = ""; matthiasm@1: // d8.hasFixedBinCount = true; matthiasm@1: // d8.binCount = 1; matthiasm@1: // d8.hasKnownExtents = false; matthiasm@1: // d8.isQuantized = false; matthiasm@1: // d8.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@1: // d8.hasDuration = false; matthiasm@1: // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@1: // list.push_back(d8); matthiasm@1: // matthiasm@1: // OutputDescriptor d9; matthiasm@1: // d9.identifier = "inconsistencysegment"; matthiasm@1: // d9.name = "Harmonic inconsistency segmenter"; matthiasm@1: // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music."; matthiasm@1: // d9.unit = ""; matthiasm@1: // d9.hasFixedBinCount = true; matthiasm@1: // d9.binCount = 0; matthiasm@1: // d9.hasKnownExtents = true; matthiasm@1: // d9.minValue = 0.1; matthiasm@1: // d9.maxValue = 0.9; matthiasm@1: // d9.isQuantized = false; matthiasm@1: // d9.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@1: // d9.hasDuration = false; matthiasm@1: // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@1: // list.push_back(d9); matthiasm@1: // matthiasm@1: OutputDescriptor d10; matthiasm@1: d10.identifier = "localtuning"; matthiasm@1: d10.name = "Local tuning"; matthiasm@4: d10.description = "Tuning based on the history up to this timestamp."; matthiasm@1: d10.unit = "Hz"; matthiasm@1: d10.hasFixedBinCount = true; matthiasm@1: d10.binCount = 1; matthiasm@1: d10.hasKnownExtents = true; matthiasm@1: d10.minValue = 427.47; matthiasm@1: d10.maxValue = 452.89; matthiasm@1: d10.isQuantized = false; matthiasm@3: d10.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@1: d10.hasDuration = false; matthiasm@3: // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@1: list.push_back(d10); matthiasm@1: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: bool matthiasm@0: NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize) matthiasm@0: { matthiasm@1: if (debug_on) { matthiasm@1: cerr << "--> initialise"; matthiasm@1: } matthiasm@1: matthiasm@0: if (channels < getMinChannelCount() || matthiasm@0: channels > getMaxChannelCount()) return false; matthiasm@0: m_blockSize = blockSize; matthiasm@0: m_stepSize = stepSize; matthiasm@0: frameCount = 0; matthiasm@0: int tempn = 256 * m_blockSize/2; matthiasm@4: // cerr << "length of tempkernel : " << tempn << endl; matthiasm@1: float *tempkernel; matthiasm@1: matthiasm@1: tempkernel = new float[tempn]; matthiasm@1: matthiasm@0: logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); matthiasm@1: m_kernelValue.clear(); matthiasm@1: m_kernelFftIndex.clear(); matthiasm@1: m_kernelNoteIndex.clear(); matthiasm@1: int countNonzero = 0; matthiasm@0: for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix matthiasm@1: for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { matthiasm@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { matthiasm@1: m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); matthiasm@0: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { matthiasm@1: countNonzero++; matthiasm@0: } matthiasm@1: m_kernelFftIndex.push_back(iFFT); matthiasm@1: m_kernelNoteIndex.push_back(iNote); matthiasm@0: } matthiasm@0: } matthiasm@1: } matthiasm@4: // cerr << "nonzero count : " << countNonzero << endl; matthiasm@1: delete [] tempkernel; matthiasm@3: ofstream myfile; matthiasm@3: myfile.open ("matrix.txt"); matthiasm@3: // myfile << "Writing this to a file.\n"; matthiasm@3: for (int i = 0; i < nNote * 84; ++i) { matthiasm@3: myfile << m_dict[i] << endl; matthiasm@3: } matthiasm@3: myfile.close(); matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@0: NNLSChroma::reset() matthiasm@0: { matthiasm@4: if (debug_on) cerr << "--> reset"; matthiasm@4: matthiasm@0: // Clear buffers, reset stored values, etc matthiasm@4: frameCount = 0; matthiasm@4: m_dictID = 0; matthiasm@4: m_fl.clear(); matthiasm@4: m_meanTuning0 = 0; matthiasm@4: m_meanTuning1 = 0; matthiasm@4: m_meanTuning2 = 0; matthiasm@4: m_localTuning0 = 0; matthiasm@4: m_localTuning1 = 0; matthiasm@4: m_localTuning2 = 0; matthiasm@4: m_localTuning.clear(); matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::FeatureSet matthiasm@0: NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) matthiasm@0: { matthiasm@4: if (debug_on) cerr << "--> process" << endl; matthiasm@0: frameCount++; matthiasm@0: float *magnitude = new float[m_blockSize/2]; matthiasm@0: matthiasm@0: Feature f10; // local tuning matthiasm@3: f10.hasTimestamp = true; matthiasm@4: f10.timestamp = timestamp; matthiasm@0: const float *fbuf = inputBuffers[0]; matthiasm@0: matthiasm@0: // make magnitude matthiasm@0: for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { matthiasm@0: magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + matthiasm@0: fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); matthiasm@0: } matthiasm@4: matthiasm@0: // note magnitude mapping using pre-calculated matrix matthiasm@0: float *nm = new float[nNote]; // note magnitude matthiasm@0: for (size_t iNote = 0; iNote < nNote; iNote++) { matthiasm@0: nm[iNote] = 0; // initialise as 0 matthiasm@0: } matthiasm@0: int binCount = 0; matthiasm@0: for (vector::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { matthiasm@0: // cerr << "."; matthiasm@1: nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; matthiasm@1: // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; matthiasm@0: binCount++; matthiasm@0: } matthiasm@1: // cerr << nm[20]; matthiasm@1: // cerr << endl; matthiasm@0: matthiasm@0: matthiasm@0: float one_over_N = 1.0/frameCount; matthiasm@0: // update means of complex tuning variables matthiasm@0: m_meanTuning0 *= float(frameCount-1)*one_over_N; matthiasm@0: m_meanTuning1 *= float(frameCount-1)*one_over_N; matthiasm@0: m_meanTuning2 *= float(frameCount-1)*one_over_N; matthiasm@0: matthiasm@0: for (int iTone = 0; iTone < 160; iTone = iTone + 3) { matthiasm@0: m_meanTuning0 += nm[iTone + 0]*one_over_N; matthiasm@0: m_meanTuning1 += nm[iTone + 1]*one_over_N; matthiasm@0: m_meanTuning2 += nm[iTone + 2]*one_over_N; matthiasm@3: float ratioOld = 0.997; matthiasm@3: m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld); matthiasm@3: m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld); matthiasm@3: m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld); matthiasm@0: } matthiasm@0: matthiasm@0: // if (m_tuneLocal) { matthiasm@0: // local tuning matthiasm@0: float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; matthiasm@0: float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; matthiasm@0: float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); matthiasm@0: m_localTuning.push_back(normalisedtuning); matthiasm@0: float tuning440 = 440 * pow(2,normalisedtuning/12); matthiasm@0: f10.values.push_back(tuning440); matthiasm@3: // cerr << tuning440 << endl; matthiasm@0: // } matthiasm@0: matthiasm@0: Feature f1; // logfreqspec matthiasm@0: f1.hasTimestamp = true; matthiasm@0: f1.timestamp = timestamp; matthiasm@0: for (size_t iNote = 0; iNote < nNote; iNote++) { matthiasm@0: f1.values.push_back(nm[iNote]); matthiasm@0: } matthiasm@0: matthiasm@0: FeatureSet fs; matthiasm@0: fs[1].push_back(f1); matthiasm@3: fs[8].push_back(f10); matthiasm@0: matthiasm@0: // deletes matthiasm@0: delete[] magnitude; matthiasm@0: delete[] nm; matthiasm@0: matthiasm@0: m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures matthiasm@7: char * pPath; matthiasm@7: pPath = getenv ("VAMP_PATH"); matthiasm@7: matthiasm@7: matthiasm@0: return fs; matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::FeatureSet matthiasm@0: NNLSChroma::getRemainingFeatures() matthiasm@0: { matthiasm@4: if (debug_on) cerr << "--> getRemainingFeatures" << endl; matthiasm@4: FeatureSet fsOut; matthiasm@4: if (m_fl.size() == 0) return fsOut; matthiasm@9: int nChord = m_chordnames.size(); matthiasm@0: // matthiasm@1: /** Calculate Tuning matthiasm@1: calculate tuning from (using the angle of the complex number defined by the matthiasm@1: cumulative mean real and imag values) matthiasm@1: **/ matthiasm@1: float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; matthiasm@1: float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; matthiasm@1: float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); matthiasm@1: float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); matthiasm@1: int intShift = floor(normalisedtuning * 3); matthiasm@1: float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this matthiasm@1: matthiasm@1: char buffer0 [50]; matthiasm@1: matthiasm@1: sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); matthiasm@1: matthiasm@1: // cerr << "normalisedtuning: " << normalisedtuning << '\n'; matthiasm@1: matthiasm@1: // push tuning to FeatureSet fsOut matthiasm@1: Feature f0; // tuning matthiasm@1: f0.hasTimestamp = true; matthiasm@1: f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; matthiasm@1: f0.label = buffer0; matthiasm@1: fsOut[0].push_back(f0); matthiasm@1: matthiasm@1: /** Tune Log-Frequency Spectrogram matthiasm@1: calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to matthiasm@1: perform linear interpolation on the existing log-frequency spectrogram (kinda f1). matthiasm@1: **/ matthiasm@1: matthiasm@1: float tempValue = 0; matthiasm@1: float dbThreshold = 0; // relative to the background spectrum matthiasm@1: float thresh = pow(10,dbThreshold/20); matthiasm@1: // cerr << "tune local ? " << m_tuneLocal << endl; matthiasm@1: int count = 0; matthiasm@1: matthiasm@1: for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { matthiasm@1: Feature f1 = *i; matthiasm@1: Feature f2; // tuned log-frequency spectrum matthiasm@1: f2.hasTimestamp = true; matthiasm@1: f2.timestamp = f1.timestamp; matthiasm@1: f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero matthiasm@1: matthiasm@1: if (m_tuneLocal) { matthiasm@1: intShift = floor(m_localTuning[count] * 3); matthiasm@1: intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this matthiasm@1: } matthiasm@1: matthiasm@1: // cerr << intShift << " " << intFactor << endl; matthiasm@1: matthiasm@4: for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins matthiasm@1: tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; matthiasm@1: f2.values.push_back(tempValue); matthiasm@1: } matthiasm@1: matthiasm@1: f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge matthiasm@1: vector runningmean = SpecialConvolution(f2.values,hw); matthiasm@1: vector runningstd; matthiasm@1: for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) matthiasm@1: runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); matthiasm@1: } matthiasm@1: runningstd = SpecialConvolution(runningstd,hw); // second step convolve matthiasm@1: for (int i = 0; i < 256; i++) { matthiasm@1: runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std matthiasm@1: if (runningstd[i] > 0) { matthiasm@1: // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? matthiasm@1: // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; matthiasm@1: f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? matthiasm@1: (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; matthiasm@1: } matthiasm@1: if (f2.values[i] < 0) { matthiasm@1: cerr << "ERROR: negative value in logfreq spectrum" << endl; matthiasm@1: } matthiasm@1: } matthiasm@1: fsOut[2].push_back(f2); matthiasm@1: count++; matthiasm@1: } matthiasm@1: matthiasm@1: /** Semitone spectrum and chromagrams matthiasm@1: Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum matthiasm@1: is inferred using a non-negative least squares algorithm. matthiasm@1: Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means matthiasm@1: bass and treble stacked onto each other). matthiasm@1: **/ matthiasm@1: // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n); matthiasm@1: matthiasm@1: vector > chordogram; matthiasm@3: vector > scoreChordogram; matthiasm@1: vector oldchroma = vector(12,0); matthiasm@1: vector oldbasschroma = vector(12,0); matthiasm@1: count = 0; matthiasm@9: matthiasm@1: for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { matthiasm@1: Feature f2 = *it; // logfreq spectrum matthiasm@1: Feature f3; // semitone spectrum matthiasm@1: Feature f4; // treble chromagram matthiasm@1: Feature f5; // bass chromagram matthiasm@1: Feature f6; // treble and bass chromagram matthiasm@1: matthiasm@1: f3.hasTimestamp = true; matthiasm@1: f3.timestamp = f2.timestamp; matthiasm@1: matthiasm@1: f4.hasTimestamp = true; matthiasm@1: f4.timestamp = f2.timestamp; matthiasm@1: matthiasm@1: f5.hasTimestamp = true; matthiasm@1: f5.timestamp = f2.timestamp; matthiasm@1: matthiasm@1: f6.hasTimestamp = true; matthiasm@1: f6.timestamp = f2.timestamp; matthiasm@1: matthiasm@3: float b[256]; matthiasm@1: matthiasm@1: bool some_b_greater_zero = false; matthiasm@3: float sumb = 0; matthiasm@1: for (int i = 0; i < 256; i++) { matthiasm@3: // b[i] = m_dict[(256 * count + i) % (256 * 84)]; matthiasm@3: b[i] = f2.values[i]; matthiasm@3: sumb += b[i]; matthiasm@1: if (b[i] > 0) { matthiasm@1: some_b_greater_zero = true; matthiasm@1: } matthiasm@1: } matthiasm@1: matthiasm@1: // here's where the non-negative least squares algorithm calculates the note activation x matthiasm@1: matthiasm@1: vector chroma = vector(12, 0); matthiasm@1: vector basschroma = vector(12, 0); matthiasm@1: float currval; matthiasm@1: unsigned iSemitone = 0; matthiasm@1: matthiasm@1: if (some_b_greater_zero) { matthiasm@3: if (m_dictID == 1) { matthiasm@1: for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { matthiasm@1: currval = 0; matthiasm@3: currval += b[iNote + 1 + -1] * 0.5; matthiasm@3: currval += b[iNote + 1 + 0] * 1.0; matthiasm@3: currval += b[iNote + 1 + 1] * 0.5; matthiasm@1: f3.values.push_back(currval); matthiasm@1: chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; matthiasm@1: basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; matthiasm@1: iSemitone++; matthiasm@1: } matthiasm@1: matthiasm@1: } else { matthiasm@3: float x[84+1000]; matthiasm@3: for (int i = 1; i < 1084; ++i) x[i] = 1.0; matthiasm@10: vector signifIndex; matthiasm@10: int index=0; matthiasm@10: sumb /= 84.0; matthiasm@10: for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { matthiasm@10: float currval = 0; matthiasm@10: currval += b[iNote + 1 + -1]; matthiasm@10: currval += b[iNote + 1 + 0]; matthiasm@10: currval += b[iNote + 1 + 1]; matthiasm@10: if (currval > 0) signifIndex.push_back(index); matthiasm@10: f3.values.push_back(0); // fill the values, change later matthiasm@10: index++; matthiasm@10: } matthiasm@3: float rnorm; matthiasm@3: float w[84+1000]; matthiasm@3: float zz[84+1000]; matthiasm@3: int indx[84+1000]; matthiasm@1: int mode; matthiasm@10: int dictsize = 256*signifIndex.size(); matthiasm@10: // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; matthiasm@10: float *curr_dict = new float[dictsize]; matthiasm@10: for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { matthiasm@10: for (unsigned iBin = 0; iBin < 256; iBin++) { matthiasm@10: curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; matthiasm@10: } matthiasm@3: } matthiasm@10: nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); matthiasm@10: delete [] curr_dict; matthiasm@10: for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { matthiasm@10: f3.values[signifIndex[iNote]] = x[iNote]; matthiasm@3: // cerr << mode << endl; matthiasm@10: chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; matthiasm@10: basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; matthiasm@3: } matthiasm@1: } matthiasm@1: } matthiasm@10: matthiasm@12: matthiasm@12: if (m_doNormalizeChroma > 0) { matthiasm@12: float chromamax = *max_element(chroma.begin(), chroma.end()); matthiasm@12: for (int i = 0; i < chroma.size(); i++) { matthiasm@12: chroma[i] /= chromamax; matthiasm@12: } matthiasm@12: } matthiasm@12: f4.values = chroma; matthiasm@1: f5.values = basschroma; matthiasm@1: chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas matthiasm@1: f6.values = chroma; matthiasm@1: matthiasm@1: // local chord estimation matthiasm@1: vector currentChordSalience; matthiasm@1: float tempchordvalue = 0; matthiasm@1: float sumchordvalue = 0; matthiasm@9: matthiasm@1: for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@1: tempchordvalue = 0; matthiasm@1: for (int iBin = 0; iBin < 12; iBin++) { matthiasm@9: tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; matthiasm@1: } matthiasm@1: for (int iBin = 12; iBin < 24; iBin++) { matthiasm@9: tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; matthiasm@1: } matthiasm@1: sumchordvalue+=tempchordvalue; matthiasm@1: currentChordSalience.push_back(tempchordvalue); matthiasm@1: } matthiasm@1: for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@1: currentChordSalience[iChord] /= sumchordvalue; matthiasm@1: } matthiasm@1: chordogram.push_back(currentChordSalience); matthiasm@1: matthiasm@1: fsOut[3].push_back(f3); matthiasm@1: fsOut[4].push_back(f4); matthiasm@1: fsOut[5].push_back(f5); matthiasm@1: fsOut[6].push_back(f6); matthiasm@1: count++; matthiasm@1: } matthiasm@10: cerr << "******* NNLS done *******" << endl; matthiasm@10: matthiasm@3: /* Simple chord estimation matthiasm@3: I just take the local chord estimates ("currentChordSalience") and average them over time, then matthiasm@3: take the maximum. Very simple, don't do this at home... matthiasm@3: */ matthiasm@3: count = 0; matthiasm@3: int halfwindowlength = m_inputSampleRate / m_stepSize; matthiasm@3: vector chordSequence; matthiasm@3: for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram matthiasm@3: vector temp = vector(nChord,0); matthiasm@3: scoreChordogram.push_back(temp); matthiasm@3: } matthiasm@4: for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { matthiasm@3: int startIndex = count + 1; matthiasm@3: int endIndex = count + 2 * halfwindowlength; matthiasm@10: matthiasm@10: float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); matthiasm@10: matthiasm@10: vector chordCandidates; matthiasm@10: for (unsigned iChord = 0; iChord < nChord-1; iChord++) { matthiasm@10: // float currsum = 0; matthiasm@10: // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { matthiasm@10: // currsum += chordogram[iFrame][iChord]; matthiasm@10: // } matthiasm@10: // if (currsum > chordThreshold) chordCandidates.push_back(iChord); matthiasm@10: for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { matthiasm@10: if (chordogram[iFrame][iChord] > chordThreshold) { matthiasm@10: chordCandidates.push_back(iChord); matthiasm@10: break; matthiasm@10: } matthiasm@10: } matthiasm@10: } matthiasm@10: chordCandidates.push_back(nChord-1); matthiasm@10: // cerr << chordCandidates.size() << endl; matthiasm@10: matthiasm@10: float maxval = 0; // will be the value of the most salient *chord change* in this frame matthiasm@4: float maxindex = 0; //... and the index thereof matthiasm@10: unsigned bestchordL = nChord-1; // index of the best "left" chord matthiasm@10: unsigned bestchordR = nChord-1; // index of the best "right" chord matthiasm@10: matthiasm@4: for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { matthiasm@3: // now find the max values on both sides of iWF matthiasm@3: // left side: matthiasm@3: float maxL = 0; matthiasm@3: unsigned maxindL = nChord-1; matthiasm@10: for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { matthiasm@10: unsigned iChord = chordCandidates[kChord]; matthiasm@3: float currsum = 0; matthiasm@3: for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { matthiasm@3: currsum += chordogram[count+iFrame][iChord]; matthiasm@3: } matthiasm@3: if (iChord == nChord-1) currsum *= 0.8; matthiasm@3: if (currsum > maxL) { matthiasm@3: maxL = currsum; matthiasm@3: maxindL = iChord; matthiasm@3: } matthiasm@3: } matthiasm@3: // right side: matthiasm@3: float maxR = 0; matthiasm@3: unsigned maxindR = nChord-1; matthiasm@10: for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { matthiasm@10: unsigned iChord = chordCandidates[kChord]; matthiasm@3: float currsum = 0; matthiasm@3: for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { matthiasm@3: currsum += chordogram[count+iFrame][iChord]; matthiasm@3: } matthiasm@3: if (iChord == nChord-1) currsum *= 0.8; matthiasm@3: if (currsum > maxR) { matthiasm@3: maxR = currsum; matthiasm@3: maxindR = iChord; matthiasm@3: } matthiasm@3: } matthiasm@3: if (maxL+maxR > maxval) { matthiasm@3: maxval = maxL+maxR; matthiasm@3: maxindex = iWF; matthiasm@3: bestchordL = maxindL; matthiasm@3: bestchordR = maxindR; matthiasm@3: } matthiasm@3: matthiasm@3: } matthiasm@3: // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; matthiasm@3: // add a score to every chord-frame-point that was part of a maximum matthiasm@3: for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { matthiasm@3: scoreChordogram[iFrame+count][bestchordL]++; matthiasm@3: } matthiasm@3: for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { matthiasm@3: scoreChordogram[iFrame+count][bestchordR]++; matthiasm@3: } matthiasm@3: count++; matthiasm@3: } matthiasm@10: cerr << "******* agent finished *******" << endl; matthiasm@3: count = 0; matthiasm@3: for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { matthiasm@3: float maxval = 0; // will be the value of the most salient chord in this frame matthiasm@3: float maxindex = 0; //... and the index thereof matthiasm@3: for (unsigned iChord = 0; iChord < nChord; iChord++) { matthiasm@3: if (scoreChordogram[count][iChord] > maxval) { matthiasm@3: maxval = scoreChordogram[count][iChord]; matthiasm@3: maxindex = iChord; matthiasm@4: // cerr << iChord << endl; matthiasm@3: } matthiasm@3: } matthiasm@3: chordSequence.push_back(maxindex); matthiasm@4: // cerr << "before modefilter, maxindex: " << maxindex << endl; matthiasm@3: count++; matthiasm@3: } matthiasm@10: cerr << "******* mode filter done *******" << endl; matthiasm@10: matthiasm@3: matthiasm@3: // mode filter on chordSequence matthiasm@3: count = 0; matthiasm@12: string oldChord = ""; matthiasm@3: for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { matthiasm@3: Feature f6 = *it; matthiasm@3: Feature f7; // chord estimate matthiasm@3: f7.hasTimestamp = true; matthiasm@3: f7.timestamp = f6.timestamp; matthiasm@3: vector chordCount = vector(nChord,0); matthiasm@3: int maxChordCount = 0; matthiasm@3: int maxChordIndex = nChord-1; matthiasm@12: string maxChord; matthiasm@4: int startIndex = max(count - halfwindowlength/2,0); matthiasm@4: int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); matthiasm@4: for (int i = startIndex; i < endIndex; i++) { matthiasm@4: chordCount[chordSequence[i]]++; matthiasm@4: if (chordCount[chordSequence[i]] > maxChordCount) { matthiasm@7: // cerr << "start index " << startIndex << endl; matthiasm@4: maxChordCount++; matthiasm@4: maxChordIndex = chordSequence[i]; matthiasm@12: maxChord = m_chordnames[maxChordIndex]; matthiasm@4: } matthiasm@4: } matthiasm@4: // chordSequence[count] = maxChordIndex; matthiasm@7: // cerr << maxChordIndex << endl; matthiasm@12: if (oldChord != maxChord) { matthiasm@12: oldChord = maxChord; matthiasm@3: matthiasm@9: // char buffer1 [50]; matthiasm@9: // if (maxChordIndex < nChord - 1) { matthiasm@9: // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); matthiasm@9: // } else { matthiasm@9: // sprintf(buffer1, "N"); matthiasm@9: // } matthiasm@9: // f7.label = buffer1; matthiasm@9: f7.label = m_chordnames[maxChordIndex]; matthiasm@3: fsOut[7].push_back(f7); matthiasm@3: } matthiasm@3: count++; matthiasm@3: } matthiasm@0: // // musicity matthiasm@0: // count = 0; matthiasm@0: // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 matthiasm@0: // vector musicityValue; matthiasm@0: // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { matthiasm@0: // Feature f4 = *it; matthiasm@0: // matthiasm@0: // int startIndex = max(count - musicitykernelwidth/2,0); matthiasm@0: // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); matthiasm@0: // float chromasum = 0; matthiasm@0: // float diffsum = 0; matthiasm@0: // for (int k = 0; k < 12; k++) { matthiasm@0: // for (int i = startIndex + 1; i < endIndex; i++) { matthiasm@0: // chromasum += pow(fsOut[4][i].values[k],2); matthiasm@0: // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); matthiasm@0: // } matthiasm@0: // } matthiasm@0: // diffsum /= chromasum; matthiasm@0: // musicityValue.push_back(diffsum); matthiasm@0: // count++; matthiasm@0: // } matthiasm@0: // matthiasm@0: // float musicityThreshold = 0.44; matthiasm@0: // if (m_stepSize == 4096) { matthiasm@0: // musicityThreshold = 0.74; matthiasm@0: // } matthiasm@0: // if (m_stepSize == 4410) { matthiasm@0: // musicityThreshold = 0.77; matthiasm@0: // } matthiasm@0: // matthiasm@0: // count = 0; matthiasm@0: // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { matthiasm@0: // Feature f4 = *it; matthiasm@0: // Feature f8; // musicity matthiasm@0: // Feature f9; // musicity segmenter matthiasm@0: // matthiasm@0: // f8.hasTimestamp = true; matthiasm@0: // f8.timestamp = f4.timestamp; matthiasm@0: // f9.hasTimestamp = true; matthiasm@0: // f9.timestamp = f4.timestamp; matthiasm@0: // matthiasm@0: // int startIndex = max(count - musicitykernelwidth/2,0); matthiasm@0: // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); matthiasm@0: // int musicityCount = 0; matthiasm@0: // for (int i = startIndex; i <= endIndex; i++) { matthiasm@0: // if (musicityValue[i] > musicityThreshold) musicityCount++; matthiasm@0: // } matthiasm@0: // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); matthiasm@0: // matthiasm@0: // if (isSpeech) { matthiasm@0: // if (oldlabeltype != 2) { matthiasm@0: // f9.label = "Speech"; matthiasm@0: // fsOut[9].push_back(f9); matthiasm@0: // oldlabeltype = 2; matthiasm@0: // } matthiasm@0: // } else { matthiasm@0: // if (oldlabeltype != 1) { matthiasm@0: // f9.label = "Music"; matthiasm@0: // fsOut[9].push_back(f9); matthiasm@0: // oldlabeltype = 1; matthiasm@0: // } matthiasm@0: // } matthiasm@0: // f8.values.push_back(musicityValue[count]); matthiasm@0: // fsOut[8].push_back(f8); matthiasm@0: // count++; matthiasm@0: // } matthiasm@0: return fsOut; matthiasm@0: matthiasm@0: } matthiasm@0: