matthiasm@0: matthiasm@0: #include "NNLSChroma.h" matthiasm@0: #include matthiasm@0: #include matthiasm@0: #include matthiasm@0: #include matthiasm@0: #include matthiasm@0: #include matthiasm@1: #include "nnls.h" matthiasm@0: // #include "cblas.h" matthiasm@0: #include "chorddict.cpp" matthiasm@0: using namespace std; matthiasm@0: matthiasm@0: const float sinvalue = 0.866025404; matthiasm@0: const float cosvalue = -0.5; matthiasm@0: const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082}; matthiasm@0: const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}; matthiasm@0: const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350}; matthiasm@0: const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)", matthiasm@0: "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"}; matthiasm@0: const vector hw(hammingwind, hammingwind+19); matthiasm@0: const int nNote = 256; matthiasm@0: matthiasm@0: /** Special Convolution matthiasm@0: special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the matthiasm@0: convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values matthiasm@0: as the first (last) valid convolution bin. matthiasm@0: **/ matthiasm@0: matthiasm@0: const bool debug_on = false; matthiasm@0: matthiasm@0: vector SpecialConvolution(vector convolvee, vector kernel) matthiasm@0: { matthiasm@0: float s; matthiasm@0: int m, n; matthiasm@0: int lenConvolvee = convolvee.size(); matthiasm@0: int lenKernel = kernel.size(); matthiasm@0: matthiasm@0: vector Z(256,0); matthiasm@0: assert(lenKernel % 2 != 0); // no exception handling !!! matthiasm@0: matthiasm@0: for (n = lenKernel - 1; n < lenConvolvee; n++) { matthiasm@0: s=0.0; matthiasm@0: for (m = 0; m < lenKernel; m++) { matthiasm@0: // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n'; matthiasm@0: s += convolvee[n-m] * kernel[m]; matthiasm@0: // if (debug_on) cerr << "--> s = " << s << '\n'; matthiasm@0: } matthiasm@0: // cerr << n - lenKernel/2 << endl; matthiasm@0: Z[n -lenKernel/2] = s; matthiasm@0: } matthiasm@0: matthiasm@0: // fill upper and lower pads matthiasm@0: for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2]; matthiasm@0: for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = matthiasm@0: Z[lenConvolvee - lenKernel/2 - 1]; matthiasm@0: return Z; matthiasm@0: } matthiasm@0: matthiasm@0: // vector FftBin2Frequency(vector binnumbers, int fs, int blocksize) matthiasm@0: // { matthiasm@0: // vector freq(binnumbers.size, 0.0); matthiasm@0: // for (unsigned i = 0; i < binnumbers.size; ++i) { matthiasm@0: // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize; matthiasm@0: // } matthiasm@0: // return freq; matthiasm@0: // } matthiasm@0: matthiasm@0: float cospuls(float x, float centre, float width) matthiasm@0: { matthiasm@0: float recipwidth = 1.0/width; matthiasm@0: if (abs(x - centre) <= 0.5 * width) { matthiasm@0: return cos((x-centre)*2*M_PI*recipwidth)*.5+.5; matthiasm@0: } matthiasm@0: return 0.0; matthiasm@0: } matthiasm@0: matthiasm@0: float pitchCospuls(float x, float centre, int binsperoctave) matthiasm@0: { matthiasm@0: float warpedf = -binsperoctave * (log2(centre) - log2(x)); matthiasm@0: float out = cospuls(warpedf, 0.0, 2.0); matthiasm@0: // now scale to correct for note density matthiasm@0: float c = log(2.0)/binsperoctave; matthiasm@0: if (x > 0) { matthiasm@0: out = out / (c * x); matthiasm@0: } else { matthiasm@0: out = 0; matthiasm@0: } matthiasm@0: return out; matthiasm@0: } matthiasm@0: matthiasm@0: bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { matthiasm@0: matthiasm@0: int binspersemitone = 3; // this must be 3 matthiasm@0: int minoctave = 0; // this must be 0 matthiasm@0: int maxoctave = 7; // this must be 7 matthiasm@1: int oversampling = 80; matthiasm@0: matthiasm@0: // linear frequency vector matthiasm@0: vector fft_f; matthiasm@0: for (int i = 0; i < blocksize/2; ++i) { matthiasm@0: fft_f.push_back(i * (fs * 1.0 / blocksize)); matthiasm@0: } matthiasm@0: float fft_width = fs * 2.0 / blocksize; matthiasm@0: matthiasm@0: // linear oversampled frequency vector matthiasm@0: vector oversampled_f; matthiasm@0: for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) { matthiasm@0: oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling)); matthiasm@0: } matthiasm@0: matthiasm@0: // pitch-spaced frequency vector matthiasm@0: int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! matthiasm@0: int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! matthiasm@0: vector cq_f; matthiasm@0: float oob = 1.0/binspersemitone; // one over binspersemitone matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); matthiasm@0: for (int i = minMIDI + 1; i < maxMIDI; ++i) { matthiasm@0: for (int k = -1; k < 2; ++k) { matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); matthiasm@0: } matthiasm@0: } matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); matthiasm@0: cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); matthiasm@0: matthiasm@0: int nFFT = fft_f.size(); matthiasm@0: matthiasm@0: vector fft_activation; matthiasm@0: for (int iOS = 0; iOS < 2 * oversampling; ++iOS) { matthiasm@0: float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width); matthiasm@0: fft_activation.push_back(cosp); matthiasm@0: // cerr << cosp << endl; matthiasm@0: } matthiasm@0: matthiasm@0: float cq_activation; matthiasm@0: for (int iFFT = 1; iFFT < nFFT; ++iFFT) { matthiasm@0: // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency) matthiasm@0: int curr_start = oversampling * iFFT - oversampling; matthiasm@0: int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here matthiasm@0: // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl; matthiasm@0: for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) { matthiasm@0: outmatrix[iFFT + nFFT * iCQ] = 0; matthiasm@1: if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood matthiasm@0: for (int iOS = curr_start; iOS < curr_end; ++iOS) { matthiasm@0: cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12); matthiasm@0: // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl; matthiasm@0: outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start]; matthiasm@0: } matthiasm@0: // if (iCQ == 1 || iCQ == 2) { matthiasm@0: // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl; matthiasm@0: // } matthiasm@0: } matthiasm@0: } matthiasm@0: } matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@1: bool dictionaryMatrix(double* dm) { matthiasm@1: int binspersemitone = 3; // this must be 3 matthiasm@1: int minoctave = 0; // this must be 0 matthiasm@1: int maxoctave = 7; // this must be 7 matthiasm@1: float s_param = 0.6; matthiasm@1: matthiasm@1: // pitch-spaced frequency vector matthiasm@1: int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! matthiasm@1: int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! matthiasm@1: vector cq_f; matthiasm@1: float oob = 1.0/binspersemitone; // one over binspersemitone matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); matthiasm@1: for (int i = minMIDI + 1; i < maxMIDI; ++i) { matthiasm@1: for (int k = -1; k < 2; ++k) { matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); matthiasm@1: } matthiasm@1: } matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); matthiasm@1: cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); matthiasm@1: matthiasm@1: // make out frequency vector matthiasm@1: vector out_f; matthiasm@1: matthiasm@1: float curr_f; matthiasm@1: float floatbin; matthiasm@1: float curr_amp; matthiasm@1: // now for every combination calculate the matrix element matthiasm@1: unsigned countElement = 0; matthiasm@1: for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) { matthiasm@1: for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) { matthiasm@1: curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm; matthiasm@1: if (curr_f > cq_f[nNote-1]) break; matthiasm@1: floatbin = (iOut * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm); matthiasm@1: curr_amp = pow(s_param,float(iHarm-1)); matthiasm@1: for (unsigned iNote = 0; iNote < nNote; ++iNote) { matthiasm@1: // cerr << dm[countElement] << endl; matthiasm@1: dm[countElement] = cospuls(iNote+1.0, floatbin, binspersemitone + 0.0); matthiasm@1: countElement++; matthiasm@1: } matthiasm@1: } matthiasm@1: } matthiasm@1: } matthiasm@1: matthiasm@0: matthiasm@0: NNLSChroma::NNLSChroma(float inputSampleRate) : matthiasm@0: Plugin(inputSampleRate), matthiasm@0: m_fl(0), matthiasm@0: m_blockSize(0), matthiasm@0: m_stepSize(0), matthiasm@0: m_lengthOfNoteIndex(0), matthiasm@0: m_meanTuning0(0), matthiasm@0: m_meanTuning1(0), matthiasm@0: m_meanTuning2(0), matthiasm@0: m_localTuning0(0), matthiasm@0: m_localTuning1(0), matthiasm@0: m_localTuning2(0), matthiasm@0: m_paling(0), matthiasm@0: m_localTuning(0), matthiasm@0: m_kernelValue(0), matthiasm@0: m_kernelFftIndex(0), matthiasm@0: m_kernelNoteIndex(0), matthiasm@1: m_dict(0), matthiasm@0: m_tuneLocal(false), matthiasm@0: m_dictID(0) matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> NNLSChroma" << endl; matthiasm@1: m_dict = new double[nNote * 84]; matthiasm@1: dictionaryMatrix(m_dict); matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: NNLSChroma::~NNLSChroma() matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> ~NNLSChroma" << endl; matthiasm@1: delete [] m_dict; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getIdentifier() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getIdentifier" << endl; matthiasm@0: return "nnls_chroma"; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getName() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getName" << endl; matthiasm@0: return "NNLS Chroma"; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getDescription() const matthiasm@0: { matthiasm@0: // Return something helpful here! matthiasm@0: if (debug_on) cerr << "--> getDescription" << endl; matthiasm@0: return ""; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getMaker() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getMaker" << endl; matthiasm@0: // Your name here matthiasm@0: return "Matthias Mauch"; matthiasm@0: } matthiasm@0: matthiasm@0: int matthiasm@0: NNLSChroma::getPluginVersion() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPluginVersion" << endl; matthiasm@0: // Increment this each time you release a version that behaves matthiasm@0: // differently from the previous one matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getCopyright() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getCopyright" << endl; matthiasm@0: // This function is not ideally named. It does not necessarily matthiasm@0: // need to say who made the plugin -- getMaker does that -- but it matthiasm@0: // should indicate the terms under which it is distributed. For matthiasm@0: // example, "Copyright (year). All Rights Reserved", or "GPL" matthiasm@0: return "Copyright (2010). All rights reserved."; matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::InputDomain matthiasm@0: NNLSChroma::getInputDomain() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getInputDomain" << endl; matthiasm@0: return FrequencyDomain; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getPreferredBlockSize() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPreferredBlockSize" << endl; matthiasm@0: return 16384; // 0 means "I can handle any block size" matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getPreferredStepSize() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPreferredStepSize" << endl; matthiasm@0: return 2048; // 0 means "anything sensible"; in practice this matthiasm@0: // means the same as the block size for TimeDomain matthiasm@0: // plugins, or half of it for FrequencyDomain plugins matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getMinChannelCount() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getMinChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: size_t matthiasm@0: NNLSChroma::getMaxChannelCount() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getMaxChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::ParameterList matthiasm@0: NNLSChroma::getParameterDescriptors() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getParameterDescriptors" << endl; matthiasm@0: ParameterList list; matthiasm@0: matthiasm@0: ParameterDescriptor d0; matthiasm@0: d0.identifier = "notedict"; matthiasm@0: d0.name = "note dictionary"; matthiasm@0: d0.description = "Notes in different note dictionaries differ by their spectral shapes."; matthiasm@0: d0.unit = ""; matthiasm@0: d0.minValue = 0; matthiasm@1: d0.maxValue = 1; matthiasm@0: d0.defaultValue = 0; matthiasm@0: d0.isQuantized = true; matthiasm@0: d0.valueNames.push_back("s = 0.6"); matthiasm@1: // d0.valueNames.push_back("s = 0.9"); matthiasm@1: // d0.valueNames.push_back("s linearly spaced"); matthiasm@0: d0.valueNames.push_back("no NNLS"); matthiasm@0: d0.quantizeStep = 1.0; matthiasm@0: list.push_back(d0); matthiasm@0: matthiasm@0: ParameterDescriptor d1; matthiasm@0: d1.identifier = "tuningmode"; matthiasm@0: d1.name = "tuning mode"; matthiasm@0: d1.description = "Tuning can be performed locally or on the whole extraction area."; matthiasm@0: d1.unit = ""; matthiasm@0: d1.minValue = 0; matthiasm@0: d1.maxValue = 1; matthiasm@0: d1.defaultValue = 1; matthiasm@0: d1.isQuantized = true; matthiasm@0: d1.valueNames.push_back("global tuning"); matthiasm@0: d1.valueNames.push_back("local tuning"); matthiasm@0: d1.quantizeStep = 1.0; matthiasm@0: list.push_back(d1); matthiasm@0: matthiasm@0: ParameterDescriptor d2; matthiasm@0: d2.identifier = "paling"; matthiasm@0: d2.name = "spectral paling"; matthiasm@0: d2.description = "Spectral paling: no paling - 0; whitening - 1."; matthiasm@0: d2.unit = ""; matthiasm@0: d2.minValue = 0; matthiasm@0: d2.maxValue = 1; matthiasm@0: d2.defaultValue = 0.5; matthiasm@0: d2.isQuantized = false; matthiasm@0: // d1.valueNames.push_back("global tuning"); matthiasm@0: // d1.valueNames.push_back("local tuning"); matthiasm@0: // d1.quantizeStep = 0.1; matthiasm@0: list.push_back(d2); matthiasm@0: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: float matthiasm@0: NNLSChroma::getParameter(string identifier) const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getParameter" << endl; matthiasm@0: if (identifier == "notedict") { matthiasm@0: return m_dictID; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "paling") { matthiasm@0: return m_paling; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "tuningmode") { matthiasm@0: if (m_tuneLocal) { matthiasm@0: return 1.0; matthiasm@0: } else { matthiasm@0: return 0.0; matthiasm@0: } matthiasm@0: } matthiasm@0: matthiasm@0: return 0; matthiasm@0: matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@0: NNLSChroma::setParameter(string identifier, float value) matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> setParameter" << endl; matthiasm@0: if (identifier == "notedict") { matthiasm@0: m_dictID = (int) value; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "paling") { matthiasm@0: m_paling = value; matthiasm@0: } matthiasm@0: matthiasm@0: if (identifier == "tuningmode") { matthiasm@0: m_tuneLocal = (value > 0) ? true : false; matthiasm@0: // cerr << "m_tuneLocal :" << m_tuneLocal << endl; matthiasm@0: } matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::ProgramList matthiasm@0: NNLSChroma::getPrograms() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getPrograms" << endl; matthiasm@0: ProgramList list; matthiasm@0: matthiasm@0: // If you have no programs, return an empty list (or simply don't matthiasm@0: // implement this function or getCurrentProgram/selectProgram) matthiasm@0: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: string matthiasm@0: NNLSChroma::getCurrentProgram() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getCurrentProgram" << endl; matthiasm@0: return ""; // no programs matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@0: NNLSChroma::selectProgram(string name) matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> selectProgram" << endl; matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: NNLSChroma::OutputList matthiasm@0: NNLSChroma::getOutputDescriptors() const matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getOutputDescriptors" << endl; matthiasm@0: OutputList list; matthiasm@0: matthiasm@0: // Make chroma names for the binNames property matthiasm@0: vector chromanames; matthiasm@0: vector bothchromanames; matthiasm@0: for (int iNote = 0; iNote < 24; iNote++) { matthiasm@0: bothchromanames.push_back(notenames[iNote]); matthiasm@0: if (iNote < 12) { matthiasm@0: chromanames.push_back(notenames[iNote]); matthiasm@0: } matthiasm@0: } matthiasm@0: matthiasm@1: // int nNote = 84; matthiasm@0: matthiasm@0: // See OutputDescriptor documentation for the possibilities here. matthiasm@0: // Every plugin must have at least one output. matthiasm@0: matthiasm@0: OutputDescriptor d0; matthiasm@0: d0.identifier = "tuning"; matthiasm@0: d0.name = "Tuning"; matthiasm@0: d0.description = "The concert pitch."; matthiasm@0: d0.unit = "Hz"; matthiasm@0: d0.hasFixedBinCount = true; matthiasm@0: d0.binCount = 0; matthiasm@0: d0.hasKnownExtents = true; matthiasm@0: d0.minValue = 427.47; matthiasm@0: d0.maxValue = 452.89; matthiasm@0: d0.isQuantized = false; matthiasm@0: d0.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@0: d0.hasDuration = false; matthiasm@0: list.push_back(d0); matthiasm@0: matthiasm@0: OutputDescriptor d1; matthiasm@0: d1.identifier = "logfreqspec"; matthiasm@0: d1.name = "Log-Frequency Spectrum"; matthiasm@0: d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping."; matthiasm@0: d1.unit = ""; matthiasm@0: d1.hasFixedBinCount = true; matthiasm@0: d1.binCount = nNote; matthiasm@0: d1.hasKnownExtents = false; matthiasm@0: d1.isQuantized = false; matthiasm@0: d1.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d1.hasDuration = false; matthiasm@0: d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d1); matthiasm@0: matthiasm@0: OutputDescriptor d2; matthiasm@0: d2.identifier = "tunedlogfreqspec"; matthiasm@0: d2.name = "Tuned Log-Frequency Spectrum"; matthiasm@0: d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency."; matthiasm@0: d2.unit = ""; matthiasm@0: d2.hasFixedBinCount = true; matthiasm@0: d2.binCount = 256; matthiasm@0: d2.hasKnownExtents = false; matthiasm@0: d2.isQuantized = false; matthiasm@0: d2.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d2.hasDuration = false; matthiasm@0: d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d2); matthiasm@0: matthiasm@0: OutputDescriptor d3; matthiasm@0: d3.identifier = "semitonespectrum"; matthiasm@0: d3.name = "Semitone Spectrum"; matthiasm@0: d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum."; matthiasm@0: d3.unit = ""; matthiasm@0: d3.hasFixedBinCount = true; matthiasm@0: d3.binCount = 84; matthiasm@0: d3.hasKnownExtents = false; matthiasm@0: d3.isQuantized = false; matthiasm@0: d3.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d3.hasDuration = false; matthiasm@0: d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d3); matthiasm@0: matthiasm@0: OutputDescriptor d4; matthiasm@0: d4.identifier = "chroma"; matthiasm@0: d4.name = "Chromagram"; matthiasm@0: d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range."; matthiasm@0: d4.unit = ""; matthiasm@0: d4.hasFixedBinCount = true; matthiasm@0: d4.binCount = 12; matthiasm@0: d4.binNames = chromanames; matthiasm@0: d4.hasKnownExtents = false; matthiasm@0: d4.isQuantized = false; matthiasm@0: d4.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d4.hasDuration = false; matthiasm@0: d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d4); matthiasm@0: matthiasm@0: OutputDescriptor d5; matthiasm@0: d5.identifier = "basschroma"; matthiasm@0: d5.name = "Bass Chromagram"; matthiasm@0: d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range."; matthiasm@0: d5.unit = ""; matthiasm@0: d5.hasFixedBinCount = true; matthiasm@0: d5.binCount = 12; matthiasm@0: d5.binNames = chromanames; matthiasm@0: d5.hasKnownExtents = false; matthiasm@0: d5.isQuantized = false; matthiasm@0: d5.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d5.hasDuration = false; matthiasm@0: d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d5); matthiasm@0: matthiasm@0: OutputDescriptor d6; matthiasm@0: d6.identifier = "bothchroma"; matthiasm@0: d6.name = "Chromagram and Bass Chromagram"; matthiasm@0: d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription."; matthiasm@0: d6.unit = ""; matthiasm@0: d6.hasFixedBinCount = true; matthiasm@0: d6.binCount = 24; matthiasm@0: d6.binNames = bothchromanames; matthiasm@0: d6.hasKnownExtents = false; matthiasm@0: d6.isQuantized = false; matthiasm@0: d6.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@0: d6.hasDuration = false; matthiasm@0: d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d6); matthiasm@0: matthiasm@0: OutputDescriptor d7; matthiasm@0: d7.identifier = "simplechord"; matthiasm@0: d7.name = "Simple Chord Estimate"; matthiasm@0: d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma."; matthiasm@0: d7.unit = ""; matthiasm@0: d7.hasFixedBinCount = true; matthiasm@0: d7.binCount = 0; matthiasm@0: d7.hasKnownExtents = false; matthiasm@0: d7.isQuantized = false; matthiasm@0: d7.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@0: d7.hasDuration = false; matthiasm@0: d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d7); matthiasm@0: matthiasm@1: // OutputDescriptor d8; matthiasm@1: // d8.identifier = "inconsistency"; matthiasm@1: // d8.name = "Harmonic inconsistency value"; matthiasm@1: // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high."; matthiasm@1: // d8.unit = ""; matthiasm@1: // d8.hasFixedBinCount = true; matthiasm@1: // d8.binCount = 1; matthiasm@1: // d8.hasKnownExtents = false; matthiasm@1: // d8.isQuantized = false; matthiasm@1: // d8.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@1: // d8.hasDuration = false; matthiasm@1: // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@1: // list.push_back(d8); matthiasm@1: // matthiasm@1: // OutputDescriptor d9; matthiasm@1: // d9.identifier = "inconsistencysegment"; matthiasm@1: // d9.name = "Harmonic inconsistency segmenter"; matthiasm@1: // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music."; matthiasm@1: // d9.unit = ""; matthiasm@1: // d9.hasFixedBinCount = true; matthiasm@1: // d9.binCount = 0; matthiasm@1: // d9.hasKnownExtents = true; matthiasm@1: // d9.minValue = 0.1; matthiasm@1: // d9.maxValue = 0.9; matthiasm@1: // d9.isQuantized = false; matthiasm@1: // d9.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@1: // d9.hasDuration = false; matthiasm@1: // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@1: // list.push_back(d9); matthiasm@1: // matthiasm@1: OutputDescriptor d10; matthiasm@1: d10.identifier = "localtuning"; matthiasm@1: d10.name = "Local tuning"; matthiasm@1: d10.description = ""; matthiasm@1: d10.unit = "Hz"; matthiasm@1: d10.hasFixedBinCount = true; matthiasm@1: d10.binCount = 1; matthiasm@1: d10.hasKnownExtents = true; matthiasm@1: d10.minValue = 427.47; matthiasm@1: d10.maxValue = 452.89; matthiasm@1: d10.isQuantized = false; matthiasm@1: d10.sampleType = OutputDescriptor::OneSamplePerStep; matthiasm@1: d10.hasDuration = false; matthiasm@1: d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@1: list.push_back(d10); matthiasm@1: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: bool matthiasm@0: NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize) matthiasm@0: { matthiasm@1: if (debug_on) { matthiasm@1: cerr << "--> initialise"; matthiasm@1: } matthiasm@1: matthiasm@0: if (channels < getMinChannelCount() || matthiasm@0: channels > getMaxChannelCount()) return false; matthiasm@0: m_blockSize = blockSize; matthiasm@0: m_stepSize = stepSize; matthiasm@0: frameCount = 0; matthiasm@0: int tempn = 256 * m_blockSize/2; matthiasm@1: cerr << "length of tempkernel : " << tempn << endl; matthiasm@1: float *tempkernel; matthiasm@1: matthiasm@1: tempkernel = new float[tempn]; matthiasm@1: matthiasm@0: logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); matthiasm@1: m_kernelValue.clear(); matthiasm@1: m_kernelFftIndex.clear(); matthiasm@1: m_kernelNoteIndex.clear(); matthiasm@1: int countNonzero = 0; matthiasm@0: for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix matthiasm@1: for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { matthiasm@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { matthiasm@1: m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); matthiasm@0: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { matthiasm@1: countNonzero++; matthiasm@0: } matthiasm@1: m_kernelFftIndex.push_back(iFFT); matthiasm@1: m_kernelNoteIndex.push_back(iNote); matthiasm@0: } matthiasm@0: } matthiasm@1: } matthiasm@1: cerr << "nonzero count : " << countNonzero << endl; matthiasm@1: delete [] tempkernel; matthiasm@1: matthiasm@1: matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@0: void matthiasm@0: NNLSChroma::reset() matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> reset"; matthiasm@0: // Clear buffers, reset stored values, etc matthiasm@0: frameCount = 0; matthiasm@0: m_dictID = 0; matthiasm@1: m_kernelValue.clear(); matthiasm@1: m_kernelFftIndex.clear(); matthiasm@1: m_kernelNoteIndex.clear(); matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::FeatureSet matthiasm@0: NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> process" << endl; matthiasm@0: // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix matthiasm@0: matthiasm@0: frameCount++; matthiasm@0: float *magnitude = new float[m_blockSize/2]; matthiasm@0: matthiasm@0: Feature f10; // local tuning matthiasm@0: matthiasm@0: const float *fbuf = inputBuffers[0]; matthiasm@0: matthiasm@0: // make magnitude matthiasm@0: for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { matthiasm@0: magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + matthiasm@0: fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); matthiasm@1: // magnitude[iBin] = (iBin == frameCount - 1 || frameCount < 2) ? 1.0 : 0.0; matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: // note magnitude mapping using pre-calculated matrix matthiasm@0: float *nm = new float[nNote]; // note magnitude matthiasm@0: for (size_t iNote = 0; iNote < nNote; iNote++) { matthiasm@0: nm[iNote] = 0; // initialise as 0 matthiasm@0: } matthiasm@0: int binCount = 0; matthiasm@0: for (vector::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { matthiasm@0: // cerr << "."; matthiasm@1: nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; matthiasm@1: // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; matthiasm@0: binCount++; matthiasm@0: } matthiasm@1: // cerr << nm[20]; matthiasm@1: // cerr << endl; matthiasm@0: matthiasm@0: matthiasm@0: float one_over_N = 1.0/frameCount; matthiasm@0: // update means of complex tuning variables matthiasm@0: m_meanTuning0 *= float(frameCount-1)*one_over_N; matthiasm@0: m_meanTuning1 *= float(frameCount-1)*one_over_N; matthiasm@0: m_meanTuning2 *= float(frameCount-1)*one_over_N; matthiasm@0: matthiasm@0: for (int iTone = 0; iTone < 160; iTone = iTone + 3) { matthiasm@0: m_meanTuning0 += nm[iTone + 0]*one_over_N; matthiasm@0: m_meanTuning1 += nm[iTone + 1]*one_over_N; matthiasm@0: m_meanTuning2 += nm[iTone + 2]*one_over_N; matthiasm@0: m_localTuning0 *= 0.99994; m_localTuning0 += nm[iTone + 0]; matthiasm@0: m_localTuning1 *= 0.99994; m_localTuning1 += nm[iTone + 1]; matthiasm@0: m_localTuning2 *= 0.99994; m_localTuning2 += nm[iTone + 2]; matthiasm@0: } matthiasm@0: matthiasm@0: // if (m_tuneLocal) { matthiasm@0: // local tuning matthiasm@0: float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; matthiasm@0: float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; matthiasm@0: float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); matthiasm@0: m_localTuning.push_back(normalisedtuning); matthiasm@0: float tuning440 = 440 * pow(2,normalisedtuning/12); matthiasm@0: f10.values.push_back(tuning440); matthiasm@0: // } matthiasm@0: matthiasm@0: Feature f1; // logfreqspec matthiasm@0: f1.hasTimestamp = true; matthiasm@0: f1.timestamp = timestamp; matthiasm@0: for (size_t iNote = 0; iNote < nNote; iNote++) { matthiasm@0: f1.values.push_back(nm[iNote]); matthiasm@0: } matthiasm@0: matthiasm@0: FeatureSet fs; matthiasm@0: fs[1].push_back(f1); matthiasm@0: fs[10].push_back(f10); matthiasm@0: matthiasm@0: // deletes matthiasm@0: delete[] magnitude; matthiasm@0: delete[] nm; matthiasm@0: matthiasm@0: m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures matthiasm@0: return fs; matthiasm@0: } matthiasm@0: matthiasm@0: NNLSChroma::FeatureSet matthiasm@0: NNLSChroma::getRemainingFeatures() matthiasm@0: { matthiasm@0: if (debug_on) cerr << "--> getRemainingFeatures" << endl; matthiasm@0: FeatureSet fsOut; matthiasm@0: // matthiasm@1: /** Calculate Tuning matthiasm@1: calculate tuning from (using the angle of the complex number defined by the matthiasm@1: cumulative mean real and imag values) matthiasm@1: **/ matthiasm@1: float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; matthiasm@1: float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; matthiasm@1: float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); matthiasm@1: float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); matthiasm@1: int intShift = floor(normalisedtuning * 3); matthiasm@1: float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this matthiasm@1: matthiasm@1: char buffer0 [50]; matthiasm@1: matthiasm@1: sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); matthiasm@1: matthiasm@1: // cerr << "normalisedtuning: " << normalisedtuning << '\n'; matthiasm@1: matthiasm@1: // push tuning to FeatureSet fsOut matthiasm@1: Feature f0; // tuning matthiasm@1: f0.hasTimestamp = true; matthiasm@1: f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; matthiasm@1: f0.label = buffer0; matthiasm@1: fsOut[0].push_back(f0); matthiasm@1: matthiasm@1: /** Tune Log-Frequency Spectrogram matthiasm@1: calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to matthiasm@1: perform linear interpolation on the existing log-frequency spectrogram (kinda f1). matthiasm@1: **/ matthiasm@1: matthiasm@1: float tempValue = 0; matthiasm@1: float dbThreshold = 0; // relative to the background spectrum matthiasm@1: float thresh = pow(10,dbThreshold/20); matthiasm@1: // cerr << "tune local ? " << m_tuneLocal << endl; matthiasm@1: int count = 0; matthiasm@1: matthiasm@1: for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { matthiasm@1: Feature f1 = *i; matthiasm@1: Feature f2; // tuned log-frequency spectrum matthiasm@1: f2.hasTimestamp = true; matthiasm@1: f2.timestamp = f1.timestamp; matthiasm@1: f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero matthiasm@1: matthiasm@1: if (m_tuneLocal) { matthiasm@1: intShift = floor(m_localTuning[count] * 3); matthiasm@1: intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this matthiasm@1: } matthiasm@1: matthiasm@1: // cerr << intShift << " " << intFactor << endl; matthiasm@1: matthiasm@1: for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins matthiasm@1: tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; matthiasm@1: f2.values.push_back(tempValue); matthiasm@1: } matthiasm@1: matthiasm@1: f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge matthiasm@1: vector runningmean = SpecialConvolution(f2.values,hw); matthiasm@1: vector runningstd; matthiasm@1: for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) matthiasm@1: runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); matthiasm@1: } matthiasm@1: runningstd = SpecialConvolution(runningstd,hw); // second step convolve matthiasm@1: for (int i = 0; i < 256; i++) { matthiasm@1: runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std matthiasm@1: if (runningstd[i] > 0) { matthiasm@1: // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? matthiasm@1: // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; matthiasm@1: f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? matthiasm@1: (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; matthiasm@1: } matthiasm@1: if (f2.values[i] < 0) { matthiasm@1: cerr << "ERROR: negative value in logfreq spectrum" << endl; matthiasm@1: } matthiasm@1: } matthiasm@1: fsOut[2].push_back(f2); matthiasm@1: count++; matthiasm@1: } matthiasm@1: matthiasm@1: /** Semitone spectrum and chromagrams matthiasm@1: Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum matthiasm@1: is inferred using a non-negative least squares algorithm. matthiasm@1: Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means matthiasm@1: bass and treble stacked onto each other). matthiasm@1: **/ matthiasm@1: // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n); matthiasm@1: matthiasm@1: vector > chordogram; matthiasm@1: vector oldchroma = vector(12,0); matthiasm@1: vector oldbasschroma = vector(12,0); matthiasm@1: count = 0; matthiasm@1: matthiasm@1: for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { matthiasm@1: Feature f2 = *it; // logfreq spectrum matthiasm@1: Feature f3; // semitone spectrum matthiasm@1: Feature f4; // treble chromagram matthiasm@1: Feature f5; // bass chromagram matthiasm@1: Feature f6; // treble and bass chromagram matthiasm@1: matthiasm@1: f3.hasTimestamp = true; matthiasm@1: f3.timestamp = f2.timestamp; matthiasm@1: matthiasm@1: f4.hasTimestamp = true; matthiasm@1: f4.timestamp = f2.timestamp; matthiasm@1: matthiasm@1: f5.hasTimestamp = true; matthiasm@1: f5.timestamp = f2.timestamp; matthiasm@1: matthiasm@1: f6.hasTimestamp = true; matthiasm@1: f6.timestamp = f2.timestamp; matthiasm@1: matthiasm@1: double b[256]; matthiasm@1: matthiasm@1: bool some_b_greater_zero = false; matthiasm@1: for (int i = 0; i < 256; i++) { matthiasm@1: b[i] = f2.values[i]; matthiasm@1: if (b[i] > 0) { matthiasm@1: some_b_greater_zero = true; matthiasm@1: } matthiasm@1: } matthiasm@1: matthiasm@1: // here's where the non-negative least squares algorithm calculates the note activation x matthiasm@1: matthiasm@1: vector chroma = vector(12, 0); matthiasm@1: vector basschroma = vector(12, 0); matthiasm@1: float currval; matthiasm@1: unsigned iSemitone = 0; matthiasm@1: matthiasm@1: if (some_b_greater_zero) { matthiasm@1: if (m_dictID == 0) { matthiasm@1: for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { matthiasm@1: currval = 0; matthiasm@1: for (unsigned iBin = 0; iBin < 3; ++iBin) { matthiasm@1: currval += b[iNote + iBin]; matthiasm@1: } matthiasm@1: f3.values.push_back(currval); matthiasm@1: chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; matthiasm@1: basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; matthiasm@1: iSemitone++; matthiasm@1: } matthiasm@1: matthiasm@1: } else { matthiasm@1: double x[84+1] = {1.0}; matthiasm@1: double rnorm; matthiasm@1: double w[84+1]; matthiasm@1: double zz[84+1]; matthiasm@1: int indx[84+2]; matthiasm@1: int mode; matthiasm@1: matthiasm@1: nnls(m_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode); matthiasm@1: } matthiasm@1: } matthiasm@1: matthiasm@1: f4.values = chroma; matthiasm@1: f5.values = basschroma; matthiasm@1: chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas matthiasm@1: f6.values = chroma; matthiasm@1: matthiasm@1: // local chord estimation matthiasm@1: vector currentChordSalience; matthiasm@1: float tempchordvalue = 0; matthiasm@1: float sumchordvalue = 0; matthiasm@1: int nChord = nChorddict / 24; matthiasm@1: for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@1: tempchordvalue = 0; matthiasm@1: for (int iBin = 0; iBin < 12; iBin++) { matthiasm@1: tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin]; matthiasm@1: } matthiasm@1: for (int iBin = 12; iBin < 24; iBin++) { matthiasm@1: tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin]; matthiasm@1: } matthiasm@1: sumchordvalue+=tempchordvalue; matthiasm@1: currentChordSalience.push_back(tempchordvalue); matthiasm@1: } matthiasm@1: for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@1: currentChordSalience[iChord] /= sumchordvalue; matthiasm@1: } matthiasm@1: chordogram.push_back(currentChordSalience); matthiasm@1: matthiasm@1: fsOut[3].push_back(f3); matthiasm@1: fsOut[4].push_back(f4); matthiasm@1: fsOut[5].push_back(f5); matthiasm@1: fsOut[6].push_back(f6); matthiasm@1: // if (x) free(x); matthiasm@1: // delete[] b; matthiasm@1: count++; matthiasm@1: } matthiasm@0: // // cerr << m_stepSize << endl<< endl; matthiasm@0: // count = 0; matthiasm@0: // int kernelwidth = (49 * 2048) / m_stepSize; matthiasm@0: // int nChord = nChorddict / 24; matthiasm@0: // int musicitykernelwidth = (50 * 2048) / m_stepSize; matthiasm@0: // matthiasm@0: // /* Simple chord estimation matthiasm@0: // I just take the local chord estimates ("currentChordSalience") and average them over time, then matthiasm@0: // take the maximum. Very simple, don't do this at home... matthiasm@0: // */ matthiasm@0: // vector chordSequence; matthiasm@0: // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { matthiasm@0: // matthiasm@0: // int startIndex = max(count - kernelwidth/2 + 1,0); matthiasm@0: // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1 + 1); matthiasm@0: // vector temp = vector(nChord,0); matthiasm@0: // for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@0: // float val = 0; matthiasm@0: // for (int i = startIndex; i < endIndex; i++) { matthiasm@0: // val += chordogram[i][iChord] * matthiasm@0: // (kernelwidth - abs(i - startIndex - kernelwidth * 0.5)); // weigthed sum (triangular window) matthiasm@0: // } matthiasm@0: // temp[iChord] = val; // sum matthiasm@0: // } matthiasm@0: // matthiasm@0: // // get maximum for "chord estimate" matthiasm@0: // matthiasm@0: // float bestChordValue = 0; matthiasm@0: // int bestChordIndex = nChord-1; // "no chord" is default matthiasm@0: // for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@0: // if (temp[iChord] > bestChordValue) { matthiasm@0: // bestChordValue = temp[iChord]; matthiasm@0: // bestChordIndex = iChord; matthiasm@0: // } matthiasm@0: // } matthiasm@0: // // cerr << bestChordIndex << endl; matthiasm@0: // chordSequence.push_back(bestChordIndex); matthiasm@0: // count++; matthiasm@0: // } matthiasm@0: // // mode filter on chordSequence matthiasm@0: // count = 0; matthiasm@0: // int oldChordIndex = -1; matthiasm@0: // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { matthiasm@0: // Feature f6 = *it; matthiasm@0: // Feature f7; // chord estimate matthiasm@0: // matthiasm@0: // f7.hasTimestamp = true; matthiasm@0: // f7.timestamp = f6.timestamp; matthiasm@0: // matthiasm@0: // vector chordCount = vector(121,0); matthiasm@0: // matthiasm@0: // int maxChordCount = 0; matthiasm@0: // int maxChordIndex = 120; matthiasm@0: // int startIndex = max(count - kernelwidth/2,0); matthiasm@0: // int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1); matthiasm@0: // for (int i = startIndex; i < endIndex; i++) { matthiasm@0: // chordCount[chordSequence[i]]++; matthiasm@0: // if (chordCount[chordSequence[i]] > maxChordCount) { matthiasm@0: // maxChordCount++; matthiasm@0: // maxChordIndex = chordSequence[i]; matthiasm@0: // } matthiasm@0: // } matthiasm@0: // if (oldChordIndex != maxChordIndex) { matthiasm@0: // oldChordIndex = maxChordIndex; matthiasm@0: // matthiasm@0: // char buffer1 [50]; matthiasm@0: // if (maxChordIndex < nChord - 1) { matthiasm@0: // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); matthiasm@0: // } else { matthiasm@0: // sprintf(buffer1, "N"); matthiasm@0: // } matthiasm@0: // f7.label = buffer1; matthiasm@0: // fsOut[7].push_back(f7); matthiasm@0: // } matthiasm@0: // count++; matthiasm@0: // } matthiasm@0: // // musicity matthiasm@0: // count = 0; matthiasm@0: // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 matthiasm@0: // vector musicityValue; matthiasm@0: // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { matthiasm@0: // Feature f4 = *it; matthiasm@0: // matthiasm@0: // int startIndex = max(count - musicitykernelwidth/2,0); matthiasm@0: // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); matthiasm@0: // float chromasum = 0; matthiasm@0: // float diffsum = 0; matthiasm@0: // for (int k = 0; k < 12; k++) { matthiasm@0: // for (int i = startIndex + 1; i < endIndex; i++) { matthiasm@0: // chromasum += pow(fsOut[4][i].values[k],2); matthiasm@0: // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); matthiasm@0: // } matthiasm@0: // } matthiasm@0: // diffsum /= chromasum; matthiasm@0: // musicityValue.push_back(diffsum); matthiasm@0: // count++; matthiasm@0: // } matthiasm@0: // matthiasm@0: // float musicityThreshold = 0.44; matthiasm@0: // if (m_stepSize == 4096) { matthiasm@0: // musicityThreshold = 0.74; matthiasm@0: // } matthiasm@0: // if (m_stepSize == 4410) { matthiasm@0: // musicityThreshold = 0.77; matthiasm@0: // } matthiasm@0: // matthiasm@0: // count = 0; matthiasm@0: // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { matthiasm@0: // Feature f4 = *it; matthiasm@0: // Feature f8; // musicity matthiasm@0: // Feature f9; // musicity segmenter matthiasm@0: // matthiasm@0: // f8.hasTimestamp = true; matthiasm@0: // f8.timestamp = f4.timestamp; matthiasm@0: // f9.hasTimestamp = true; matthiasm@0: // f9.timestamp = f4.timestamp; matthiasm@0: // matthiasm@0: // int startIndex = max(count - musicitykernelwidth/2,0); matthiasm@0: // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); matthiasm@0: // int musicityCount = 0; matthiasm@0: // for (int i = startIndex; i <= endIndex; i++) { matthiasm@0: // if (musicityValue[i] > musicityThreshold) musicityCount++; matthiasm@0: // } matthiasm@0: // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); matthiasm@0: // matthiasm@0: // if (isSpeech) { matthiasm@0: // if (oldlabeltype != 2) { matthiasm@0: // f9.label = "Speech"; matthiasm@0: // fsOut[9].push_back(f9); matthiasm@0: // oldlabeltype = 2; matthiasm@0: // } matthiasm@0: // } else { matthiasm@0: // if (oldlabeltype != 1) { matthiasm@0: // f9.label = "Music"; matthiasm@0: // fsOut[9].push_back(f9); matthiasm@0: // oldlabeltype = 1; matthiasm@0: // } matthiasm@0: // } matthiasm@0: // f8.values.push_back(musicityValue[count]); matthiasm@0: // fsOut[8].push_back(f8); matthiasm@0: // count++; matthiasm@0: // } matthiasm@0: return fsOut; matthiasm@0: matthiasm@0: } matthiasm@0: