Chris@3: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@3: /* Chris@3: Permission is hereby granted, free of charge, to any person Chris@3: obtaining a copy of this software and associated documentation Chris@3: files (the "Software"), to deal in the Software without Chris@3: restriction, including without limitation the rights to use, copy, Chris@3: modify, merge, publish, distribute, sublicense, and/or sell copies Chris@3: of the Software, and to permit persons to whom the Software is Chris@3: furnished to do so, subject to the following conditions: Chris@3: Chris@3: The above copyright notice and this permission notice shall be Chris@3: included in all copies or substantial portions of the Software. Chris@3: Chris@3: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Chris@3: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF Chris@3: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND Chris@3: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR Chris@3: ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF Chris@3: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION Chris@3: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Chris@3: */ Chris@3: Chris@3: #include "CepstrumPitchTracker.h" Chris@3: Chris@3: #include Chris@3: #include Chris@3: Chris@3: #include Chris@3: #include Chris@3: #include Chris@3: Chris@3: using std::string; Chris@7: using std::vector; Chris@7: Chris@8: CepstrumPitchTracker::Hypothesis::Hypothesis() Chris@7: { Chris@8: m_state = New; Chris@7: m_age = 0; Chris@7: } Chris@7: Chris@11: CepstrumPitchTracker::Hypothesis::~Hypothesis() Chris@11: { Chris@11: } Chris@11: Chris@7: bool Chris@7: CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s) Chris@7: { Chris@7: if (m_pending.empty()) { Chris@7: return true; Chris@7: } Chris@7: Estimate last = m_pending[m_pending.size()-1]; Chris@7: double r = s.freq / last.freq; Chris@7: int cents = lrint(1200.0 * (log(r) / log(2.0))); Chris@7: return (cents > -200 && cents < 200); Chris@7: } Chris@7: Chris@7: bool Chris@7: CepstrumPitchTracker::Hypothesis::isSatisfied() Chris@7: { Chris@7: return (m_pending.size() > 2); Chris@7: } Chris@7: Chris@8: void Chris@8: CepstrumPitchTracker::Hypothesis::advanceTime() Chris@8: { Chris@8: ++m_age; Chris@8: } Chris@8: Chris@7: bool Chris@7: CepstrumPitchTracker::Hypothesis::test(Estimate s) Chris@7: { Chris@8: bool accept = false; Chris@8: Chris@8: switch (m_state) { Chris@8: Chris@8: case New: Chris@8: m_state = Provisional; Chris@8: accept = true; Chris@8: break; Chris@8: Chris@8: case Provisional: Chris@8: if (m_age > 3) { Chris@8: m_state = Rejected; Chris@8: } else if (isWithinTolerance(s)) { Chris@8: accept = true; Chris@8: } Chris@8: break; Chris@8: Chris@8: case Satisfied: Chris@8: if (m_age > 3) { Chris@8: m_state = Expired; Chris@8: } else if (isWithinTolerance(s)) { Chris@8: accept = true; Chris@8: } Chris@8: break; Chris@8: Chris@8: case Rejected: Chris@8: break; Chris@8: Chris@8: case Expired: Chris@8: break; Chris@7: } Chris@7: Chris@8: if (accept) { Chris@8: m_pending.push_back(s); Chris@8: m_age = 0; Chris@8: if (m_state == Provisional && isSatisfied()) { Chris@8: m_state = Satisfied; Chris@7: } Chris@7: } Chris@7: Chris@8: return accept; Chris@8: } Chris@7: Chris@7: CepstrumPitchTracker::Hypothesis::State Chris@7: CepstrumPitchTracker::Hypothesis::getState() Chris@7: { Chris@7: return m_state; Chris@7: } Chris@7: Chris@12: int Chris@12: CepstrumPitchTracker::Hypothesis::getPendingLength() Chris@12: { Chris@12: return m_pending.size(); Chris@12: } Chris@12: Chris@7: CepstrumPitchTracker::Hypothesis::Estimates Chris@7: CepstrumPitchTracker::Hypothesis::getAcceptedEstimates() Chris@7: { Chris@7: if (m_state == Satisfied || m_state == Expired) { Chris@7: return m_pending; Chris@7: } else { Chris@7: return Estimates(); Chris@7: } Chris@7: } Chris@7: Chris@11: void Chris@11: CepstrumPitchTracker::Hypothesis::addFeatures(FeatureList &fl) Chris@11: { Chris@11: for (int i = 0; i < m_pending.size(); ++i) { Chris@11: Feature f; Chris@11: f.hasTimestamp = true; Chris@11: f.timestamp = m_pending[i].time; Chris@11: f.values.push_back(m_pending[i].freq); Chris@11: fl.push_back(f); Chris@11: } Chris@11: } Chris@3: Chris@3: CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) : Chris@3: Plugin(inputSampleRate), Chris@3: m_channels(0), Chris@3: m_stepSize(256), Chris@3: m_blockSize(1024), Chris@3: m_fmin(50), Chris@3: m_fmax(1000), Chris@5: m_histlen(1), Chris@5: m_vflen(3), Chris@3: m_binFrom(0), Chris@3: m_binTo(0), Chris@3: m_bins(0), Chris@6: m_history(0), Chris@6: m_prevpeak(0), Chris@6: m_prevprop(0) Chris@3: { Chris@3: } Chris@3: Chris@3: CepstrumPitchTracker::~CepstrumPitchTracker() Chris@3: { Chris@3: if (m_history) { Chris@3: for (int i = 0; i < m_histlen; ++i) { Chris@3: delete[] m_history[i]; Chris@3: } Chris@3: delete[] m_history; Chris@3: } Chris@3: } Chris@3: Chris@3: string Chris@3: CepstrumPitchTracker::getIdentifier() const Chris@3: { Chris@3: return "cepstrum-pitch"; Chris@3: } Chris@3: Chris@3: string Chris@3: CepstrumPitchTracker::getName() const Chris@3: { Chris@3: return "Cepstrum Pitch Tracker"; Chris@3: } Chris@3: Chris@3: string Chris@3: CepstrumPitchTracker::getDescription() const Chris@3: { Chris@3: return "Estimate f0 of monophonic material using a cepstrum method."; Chris@3: } Chris@3: Chris@3: string Chris@3: CepstrumPitchTracker::getMaker() const Chris@3: { Chris@3: return "Chris Cannam"; Chris@3: } Chris@3: Chris@3: int Chris@3: CepstrumPitchTracker::getPluginVersion() const Chris@3: { Chris@3: // Increment this each time you release a version that behaves Chris@3: // differently from the previous one Chris@3: return 1; Chris@3: } Chris@3: Chris@3: string Chris@3: CepstrumPitchTracker::getCopyright() const Chris@3: { Chris@3: return "Freely redistributable (BSD license)"; Chris@3: } Chris@3: Chris@3: CepstrumPitchTracker::InputDomain Chris@3: CepstrumPitchTracker::getInputDomain() const Chris@3: { Chris@3: return FrequencyDomain; Chris@3: } Chris@3: Chris@3: size_t Chris@3: CepstrumPitchTracker::getPreferredBlockSize() const Chris@3: { Chris@3: return 1024; Chris@3: } Chris@3: Chris@3: size_t Chris@3: CepstrumPitchTracker::getPreferredStepSize() const Chris@3: { Chris@3: return 256; Chris@3: } Chris@3: Chris@3: size_t Chris@3: CepstrumPitchTracker::getMinChannelCount() const Chris@3: { Chris@3: return 1; Chris@3: } Chris@3: Chris@3: size_t Chris@3: CepstrumPitchTracker::getMaxChannelCount() const Chris@3: { Chris@3: return 1; Chris@3: } Chris@3: Chris@3: CepstrumPitchTracker::ParameterList Chris@3: CepstrumPitchTracker::getParameterDescriptors() const Chris@3: { Chris@3: ParameterList list; Chris@3: return list; Chris@3: } Chris@3: Chris@3: float Chris@3: CepstrumPitchTracker::getParameter(string identifier) const Chris@3: { Chris@3: return 0.f; Chris@3: } Chris@3: Chris@3: void Chris@3: CepstrumPitchTracker::setParameter(string identifier, float value) Chris@3: { Chris@3: } Chris@3: Chris@3: CepstrumPitchTracker::ProgramList Chris@3: CepstrumPitchTracker::getPrograms() const Chris@3: { Chris@3: ProgramList list; Chris@3: return list; Chris@3: } Chris@3: Chris@3: string Chris@3: CepstrumPitchTracker::getCurrentProgram() const Chris@3: { Chris@3: return ""; // no programs Chris@3: } Chris@3: Chris@3: void Chris@3: CepstrumPitchTracker::selectProgram(string name) Chris@3: { Chris@3: } Chris@3: Chris@3: CepstrumPitchTracker::OutputList Chris@3: CepstrumPitchTracker::getOutputDescriptors() const Chris@3: { Chris@3: OutputList outputs; Chris@3: Chris@3: int n = 0; Chris@3: Chris@3: OutputDescriptor d; Chris@3: Chris@3: d.identifier = "f0"; Chris@3: d.name = "Estimated f0"; Chris@3: d.description = "Estimated fundamental frequency"; Chris@3: d.unit = "Hz"; Chris@3: d.hasFixedBinCount = true; Chris@3: d.binCount = 1; Chris@3: d.hasKnownExtents = true; Chris@3: d.minValue = m_fmin; Chris@3: d.maxValue = m_fmax; Chris@3: d.isQuantized = false; Chris@3: d.sampleType = OutputDescriptor::FixedSampleRate; Chris@3: d.sampleRate = (m_inputSampleRate / m_stepSize); Chris@3: d.hasDuration = false; Chris@3: outputs.push_back(d); Chris@3: Chris@3: return outputs; Chris@3: } Chris@3: Chris@3: bool Chris@3: CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) Chris@3: { Chris@3: if (channels < getMinChannelCount() || Chris@3: channels > getMaxChannelCount()) return false; Chris@3: Chris@3: // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels Chris@3: // << ", stepSize = " << stepSize << ", blockSize = " << blockSize Chris@3: // << std::endl; Chris@3: Chris@3: m_channels = channels; Chris@3: m_stepSize = stepSize; Chris@3: m_blockSize = blockSize; Chris@3: Chris@3: m_binFrom = int(m_inputSampleRate / m_fmax); Chris@3: m_binTo = int(m_inputSampleRate / m_fmin); Chris@3: Chris@3: if (m_binTo >= (int)m_blockSize / 2) { Chris@3: m_binTo = m_blockSize / 2 - 1; Chris@3: } Chris@3: Chris@3: m_bins = (m_binTo - m_binFrom) + 1; Chris@3: Chris@3: m_history = new double *[m_histlen]; Chris@3: for (int i = 0; i < m_histlen; ++i) { Chris@3: m_history[i] = new double[m_bins]; Chris@3: } Chris@3: Chris@3: reset(); Chris@3: Chris@3: return true; Chris@3: } Chris@3: Chris@3: void Chris@3: CepstrumPitchTracker::reset() Chris@3: { Chris@3: for (int i = 0; i < m_histlen; ++i) { Chris@3: for (int j = 0; j < m_bins; ++j) { Chris@3: m_history[i][j] = 0.0; Chris@3: } Chris@3: } Chris@3: } Chris@3: Chris@3: void Chris@3: CepstrumPitchTracker::filter(const double *cep, double *result) Chris@3: { Chris@3: int hix = m_histlen - 1; // current history index Chris@3: Chris@3: // roll back the history Chris@3: if (m_histlen > 1) { Chris@3: double *oldest = m_history[0]; Chris@3: for (int i = 1; i < m_histlen; ++i) { Chris@3: m_history[i-1] = m_history[i]; Chris@3: } Chris@3: // and stick this back in the newest spot, to recycle Chris@3: m_history[hix] = oldest; Chris@3: } Chris@3: Chris@3: for (int i = 0; i < m_bins; ++i) { Chris@5: double v = 0; Chris@5: int n = 0; Chris@5: // average according to the vertical filter length Chris@5: for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { Chris@5: int ix = i + m_binFrom + j; Chris@5: if (ix >= 0 && ix < m_blockSize) { Chris@5: v += cep[ix]; Chris@5: ++n; Chris@5: } Chris@5: } Chris@5: m_history[hix][i] = v / n; Chris@3: } Chris@3: Chris@3: for (int i = 0; i < m_bins; ++i) { Chris@3: double mean = 0.0; Chris@3: for (int j = 0; j < m_histlen; ++j) { Chris@3: mean += m_history[j][i]; Chris@3: } Chris@3: mean /= m_histlen; Chris@3: result[i] = mean; Chris@3: } Chris@3: } Chris@3: Chris@6: double Chris@6: CepstrumPitchTracker::calculatePeakProportion(const double *data, double abstot, int n) Chris@6: { Chris@6: double aroundPeak = data[n]; Chris@6: double peakProportion = 0.0; Chris@6: Chris@6: int i = n - 1; Chris@6: while (i > 0 && data[i] <= data[i+1]) { Chris@6: aroundPeak += fabs(data[i]); Chris@6: --i; Chris@6: } Chris@6: i = n + 1; Chris@6: while (i < m_bins && data[i] <= data[i-1]) { Chris@6: aroundPeak += fabs(data[i]); Chris@6: ++i; Chris@6: } Chris@6: peakProportion = aroundPeak / abstot; Chris@6: Chris@6: return peakProportion; Chris@6: } Chris@6: Chris@6: bool Chris@6: CepstrumPitchTracker::acceptPeak(int n, double peakProportion) Chris@6: { Chris@6: bool accept = false; Chris@6: Chris@6: if (abs(n - m_prevpeak) < 10) { //!!! should depend on bin count Chris@6: accept = true; Chris@6: } else if (peakProportion > m_prevprop * 2) { Chris@6: accept = true; Chris@6: } Chris@6: Chris@6: return accept; Chris@6: } Chris@6: Chris@3: CepstrumPitchTracker::FeatureSet Chris@3: CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp) Chris@3: { Chris@3: FeatureSet fs; Chris@3: Chris@3: int bs = m_blockSize; Chris@3: int hs = m_blockSize/2 + 1; Chris@3: Chris@3: double *rawcep = new double[bs]; Chris@3: double *io = new double[bs]; Chris@3: double *logmag = new double[bs]; Chris@3: Chris@4: // The "inverse symmetric" method. Seems to be the most reliable Chris@3: Chris@3: for (int i = 0; i < hs; ++i) { Chris@3: Chris@3: double power = Chris@3: inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + Chris@3: inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; Chris@3: double mag = sqrt(power); Chris@3: Chris@3: double lm = log(mag + 0.00000001); Chris@3: Chris@4: logmag[i] = lm; Chris@4: if (i > 0) logmag[bs - i] = lm; Chris@3: } Chris@3: Chris@4: fft(bs, true, logmag, 0, rawcep, io); Chris@3: Chris@3: delete[] logmag; Chris@3: delete[] io; Chris@3: Chris@3: int n = m_bins; Chris@3: double *data = new double[n]; Chris@3: filter(rawcep, data); Chris@3: delete[] rawcep; Chris@3: Chris@6: double abstot = 0.0; Chris@6: Chris@6: for (int i = 0; i < n; ++i) { Chris@6: abstot += fabs(data[i]); Chris@6: } Chris@6: Chris@3: double maxval = 0.0; Chris@6: int maxbin = -1; Chris@3: Chris@3: for (int i = 0; i < n; ++i) { Chris@3: if (data[i] > maxval) { Chris@3: maxval = data[i]; Chris@3: maxbin = i; Chris@3: } Chris@3: } Chris@3: Chris@8: if (maxbin < 0) return fs; Chris@8: Chris@8: double peakfreq = m_inputSampleRate / (maxbin + m_binFrom); Chris@8: Hypothesis::Estimate e; Chris@8: e.freq = peakfreq; Chris@8: e.time = timestamp; Chris@8: Chris@8: m_accepted.advanceTime(); Chris@11: Chris@8: for (int i = 0; i < m_possible.size(); ++i) { Chris@8: m_possible[i].advanceTime(); Chris@8: } Chris@8: Chris@11: if (!m_accepted.test(e)) { Chris@13: Chris@11: int candidate = -1; Chris@13: bool accepted = false; Chris@13: Chris@11: for (int i = 0; i < m_possible.size(); ++i) { Chris@11: if (m_possible[i].test(e)) { Chris@13: accepted = true; Chris@11: if (m_possible[i].getState() == Hypothesis::Satisfied) { Chris@11: candidate = i; Chris@11: } Chris@11: break; Chris@11: } Chris@11: } Chris@12: Chris@13: if (!accepted) { Chris@13: Hypothesis h; Chris@13: h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this Chris@13: m_possible.push_back(h); Chris@13: } Chris@13: Chris@11: if (m_accepted.getState() == Hypothesis::Expired) { Chris@11: m_accepted.addFeatures(fs[0]); Chris@12: } Chris@12: Chris@12: if (m_accepted.getState() == Hypothesis::Expired || Chris@12: m_accepted.getState() == Hypothesis::Rejected) { Chris@11: if (candidate >= 0) { Chris@11: m_accepted = m_possible[candidate]; Chris@11: } else { Chris@11: m_accepted = Hypothesis(); Chris@11: } Chris@11: } Chris@8: Chris@12: std::cerr << "accepted length = " << m_accepted.getPendingLength() Chris@12: << ", state = " << m_accepted.getState() Chris@12: << ", hypothesis count = " << m_possible.size() << std::endl; Chris@12: Chris@11: //!!! and also need to reap rejected/expired hypotheses from the list Chris@11: } Chris@11: Chris@8: Chris@8: /* Chris@6: bool accepted = false; Chris@6: Chris@6: if (maxbin >= 0) { Chris@6: double pp = calculatePeakProportion(data, abstot, maxbin); Chris@6: if (acceptPeak(maxbin, pp)) { Chris@6: accepted = true; Chris@6: } else { Chris@6: // try a secondary peak Chris@6: maxval = 0.0; Chris@6: int secondbin = 0; Chris@6: for (int i = 1; i < n-1; ++i) { Chris@6: if (i != maxbin && Chris@6: data[i] > data[i-1] && Chris@6: data[i] > data[i+1] && Chris@6: data[i] > maxval) { Chris@6: maxval = data[i]; Chris@6: secondbin = i; Chris@6: } Chris@6: } Chris@6: double spp = calculatePeakProportion(data, abstot, secondbin); Chris@6: if (acceptPeak(secondbin, spp)) { Chris@6: maxbin = secondbin; Chris@6: pp = spp; Chris@6: accepted = true; Chris@6: } Chris@3: } Chris@6: if (accepted) { Chris@6: m_prevpeak = maxbin; Chris@6: m_prevprop = pp; Chris@3: } Chris@3: } Chris@8: */ Chris@3: // std::cerr << "peakProportion = " << peakProportion << std::endl; Chris@3: // std::cerr << "peak = " << m_inputSampleRate / (maxbin + m_binFrom) << std::endl; Chris@4: // std::cerr << "bins = " << m_bins << std::endl; Chris@3: Chris@6: // if (peakProportion >= (0.00006 * m_bins)) { Chris@11: /* Chris@6: if (accepted) { Chris@3: Feature f; Chris@3: f.hasTimestamp = true; Chris@3: f.timestamp = timestamp; Chris@3: f.values.push_back(m_inputSampleRate / (maxbin + m_binFrom)); Chris@3: fs[0].push_back(f); Chris@3: } Chris@11: */ Chris@3: delete[] data; Chris@3: return fs; Chris@3: } Chris@3: Chris@3: CepstrumPitchTracker::FeatureSet Chris@3: CepstrumPitchTracker::getRemainingFeatures() Chris@3: { Chris@3: FeatureSet fs; Chris@11: if (m_accepted.getState() != Hypothesis::New) { Chris@11: m_accepted.addFeatures(fs[0]); Chris@11: } Chris@3: return fs; Chris@3: } Chris@3: Chris@3: void Chris@3: CepstrumPitchTracker::fft(unsigned int n, bool inverse, Chris@3: double *ri, double *ii, double *ro, double *io) Chris@3: { Chris@3: if (!ri || !ro || !io) return; Chris@3: Chris@3: unsigned int bits; Chris@3: unsigned int i, j, k, m; Chris@3: unsigned int blockSize, blockEnd; Chris@3: Chris@3: double tr, ti; Chris@3: Chris@3: if (n < 2) return; Chris@3: if (n & (n-1)) return; Chris@3: Chris@3: double angle = 2.0 * M_PI; Chris@3: if (inverse) angle = -angle; Chris@3: Chris@3: for (i = 0; ; ++i) { Chris@3: if (n & (1 << i)) { Chris@3: bits = i; Chris@3: break; Chris@3: } Chris@3: } Chris@3: Chris@3: static unsigned int tableSize = 0; Chris@3: static int *table = 0; Chris@3: Chris@3: if (tableSize != n) { Chris@3: Chris@3: delete[] table; Chris@3: Chris@3: table = new int[n]; Chris@3: Chris@3: for (i = 0; i < n; ++i) { Chris@3: Chris@3: m = i; Chris@3: Chris@3: for (j = k = 0; j < bits; ++j) { Chris@3: k = (k << 1) | (m & 1); Chris@3: m >>= 1; Chris@3: } Chris@3: Chris@3: table[i] = k; Chris@3: } Chris@3: Chris@3: tableSize = n; Chris@3: } Chris@3: Chris@3: if (ii) { Chris@3: for (i = 0; i < n; ++i) { Chris@3: ro[table[i]] = ri[i]; Chris@3: io[table[i]] = ii[i]; Chris@3: } Chris@3: } else { Chris@3: for (i = 0; i < n; ++i) { Chris@3: ro[table[i]] = ri[i]; Chris@3: io[table[i]] = 0.0; Chris@3: } Chris@3: } Chris@3: Chris@3: blockEnd = 1; Chris@3: Chris@3: for (blockSize = 2; blockSize <= n; blockSize <<= 1) { Chris@3: Chris@3: double delta = angle / (double)blockSize; Chris@3: double sm2 = -sin(-2 * delta); Chris@3: double sm1 = -sin(-delta); Chris@3: double cm2 = cos(-2 * delta); Chris@3: double cm1 = cos(-delta); Chris@3: double w = 2 * cm1; Chris@3: double ar[3], ai[3]; Chris@3: Chris@3: for (i = 0; i < n; i += blockSize) { Chris@3: Chris@3: ar[2] = cm2; Chris@3: ar[1] = cm1; Chris@3: Chris@3: ai[2] = sm2; Chris@3: ai[1] = sm1; Chris@3: Chris@3: for (j = i, m = 0; m < blockEnd; j++, m++) { Chris@3: Chris@3: ar[0] = w * ar[1] - ar[2]; Chris@3: ar[2] = ar[1]; Chris@3: ar[1] = ar[0]; Chris@3: Chris@3: ai[0] = w * ai[1] - ai[2]; Chris@3: ai[2] = ai[1]; Chris@3: ai[1] = ai[0]; Chris@3: Chris@3: k = j + blockEnd; Chris@3: tr = ar[0] * ro[k] - ai[0] * io[k]; Chris@3: ti = ar[0] * io[k] + ai[0] * ro[k]; Chris@3: Chris@3: ro[k] = ro[j] - tr; Chris@3: io[k] = io[j] - ti; Chris@3: Chris@3: ro[j] += tr; Chris@3: io[j] += ti; Chris@3: } Chris@3: } Chris@3: Chris@3: blockEnd = blockSize; Chris@3: } Chris@3: } Chris@3: Chris@3: