Chris@8: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@8: /* Chris@8: Permission is hereby granted, free of charge, to any person Chris@8: obtaining a copy of this software and associated documentation Chris@8: files (the "Software"), to deal in the Software without Chris@8: restriction, including without limitation the rights to use, copy, Chris@8: modify, merge, publish, distribute, sublicense, and/or sell copies Chris@8: of the Software, and to permit persons to whom the Software is Chris@8: furnished to do so, subject to the following conditions: Chris@8: Chris@8: The above copyright notice and this permission notice shall be Chris@8: included in all copies or substantial portions of the Software. Chris@8: Chris@8: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Chris@8: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF Chris@8: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND Chris@8: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR Chris@8: ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF Chris@8: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION Chris@8: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Chris@8: */ Chris@8: Chris@8: #include "CepstrumPitchTracker.h" Chris@8: Chris@8: #include Chris@8: #include Chris@8: Chris@8: #include Chris@8: #include Chris@8: #include Chris@8: Chris@8: using std::string; Chris@12: using std::vector; Chris@12: Chris@13: CepstrumPitchTracker::Hypothesis::Hypothesis() Chris@12: { Chris@13: m_state = New; Chris@12: m_age = 0; Chris@12: } Chris@12: Chris@16: CepstrumPitchTracker::Hypothesis::~Hypothesis() Chris@16: { Chris@16: } Chris@16: Chris@12: bool Chris@12: CepstrumPitchTracker::Hypothesis::isWithinTolerance(Estimate s) Chris@12: { Chris@12: if (m_pending.empty()) { Chris@12: return true; Chris@12: } Chris@12: Estimate last = m_pending[m_pending.size()-1]; Chris@12: double r = s.freq / last.freq; Chris@12: int cents = lrint(1200.0 * (log(r) / log(2.0))); Chris@19: return (cents > -100 && cents < 100); Chris@12: } Chris@12: Chris@12: bool Chris@12: CepstrumPitchTracker::Hypothesis::isSatisfied() Chris@12: { Chris@20: if (m_pending.empty()) return false; Chris@20: Chris@20: double meanConfidence = 0.0; Chris@20: for (int i = 0; i < m_pending.size(); ++i) { Chris@20: meanConfidence += m_pending[i].confidence; Chris@20: } Chris@20: meanConfidence /= m_pending.size(); Chris@20: Chris@20: int lengthRequired = int(2.0 / meanConfidence + 0.5); Chris@20: std::cerr << "meanConfidence = " << meanConfidence << ", lengthRequired = " << lengthRequired << ", length = " << m_pending.size() << std::endl; Chris@20: Chris@20: return (m_pending.size() > lengthRequired); Chris@12: } Chris@12: Chris@13: void Chris@13: CepstrumPitchTracker::Hypothesis::advanceTime() Chris@13: { Chris@13: ++m_age; Chris@13: } Chris@13: Chris@12: bool Chris@12: CepstrumPitchTracker::Hypothesis::test(Estimate s) Chris@12: { Chris@13: bool accept = false; Chris@13: Chris@13: switch (m_state) { Chris@13: Chris@13: case New: Chris@13: m_state = Provisional; Chris@13: accept = true; Chris@13: break; Chris@13: Chris@13: case Provisional: Chris@13: if (m_age > 3) { Chris@13: m_state = Rejected; Chris@13: } else if (isWithinTolerance(s)) { Chris@13: accept = true; Chris@13: } Chris@13: break; Chris@13: Chris@13: case Satisfied: Chris@13: if (m_age > 3) { Chris@13: m_state = Expired; Chris@13: } else if (isWithinTolerance(s)) { Chris@13: accept = true; Chris@13: } Chris@13: break; Chris@13: Chris@13: case Rejected: Chris@13: break; Chris@13: Chris@13: case Expired: Chris@13: break; Chris@12: } Chris@12: Chris@13: if (accept) { Chris@13: m_pending.push_back(s); Chris@13: m_age = 0; Chris@13: if (m_state == Provisional && isSatisfied()) { Chris@13: m_state = Satisfied; Chris@12: } Chris@12: } Chris@12: Chris@19: return accept && (m_state == Satisfied); Chris@13: } Chris@12: Chris@12: CepstrumPitchTracker::Hypothesis::State Chris@12: CepstrumPitchTracker::Hypothesis::getState() Chris@12: { Chris@12: return m_state; Chris@12: } Chris@12: Chris@17: int Chris@17: CepstrumPitchTracker::Hypothesis::getPendingLength() Chris@17: { Chris@17: return m_pending.size(); Chris@17: } Chris@17: Chris@12: CepstrumPitchTracker::Hypothesis::Estimates Chris@12: CepstrumPitchTracker::Hypothesis::getAcceptedEstimates() Chris@12: { Chris@12: if (m_state == Satisfied || m_state == Expired) { Chris@12: return m_pending; Chris@12: } else { Chris@12: return Estimates(); Chris@12: } Chris@12: } Chris@12: Chris@16: void Chris@16: CepstrumPitchTracker::Hypothesis::addFeatures(FeatureList &fl) Chris@16: { Chris@16: for (int i = 0; i < m_pending.size(); ++i) { Chris@16: Feature f; Chris@16: f.hasTimestamp = true; Chris@16: f.timestamp = m_pending[i].time; Chris@16: f.values.push_back(m_pending[i].freq); Chris@16: fl.push_back(f); Chris@16: } Chris@16: } Chris@8: Chris@8: CepstrumPitchTracker::CepstrumPitchTracker(float inputSampleRate) : Chris@8: Plugin(inputSampleRate), Chris@8: m_channels(0), Chris@8: m_stepSize(256), Chris@8: m_blockSize(1024), Chris@8: m_fmin(50), Chris@8: m_fmax(1000), Chris@10: m_vflen(3), Chris@8: m_binFrom(0), Chris@8: m_binTo(0), Chris@20: m_bins(0) Chris@8: { Chris@8: } Chris@8: Chris@8: CepstrumPitchTracker::~CepstrumPitchTracker() Chris@8: { Chris@8: } Chris@8: Chris@8: string Chris@8: CepstrumPitchTracker::getIdentifier() const Chris@8: { Chris@8: return "cepstrum-pitch"; Chris@8: } Chris@8: Chris@8: string Chris@8: CepstrumPitchTracker::getName() const Chris@8: { Chris@8: return "Cepstrum Pitch Tracker"; Chris@8: } Chris@8: Chris@8: string Chris@8: CepstrumPitchTracker::getDescription() const Chris@8: { Chris@8: return "Estimate f0 of monophonic material using a cepstrum method."; Chris@8: } Chris@8: Chris@8: string Chris@8: CepstrumPitchTracker::getMaker() const Chris@8: { Chris@8: return "Chris Cannam"; Chris@8: } Chris@8: Chris@8: int Chris@8: CepstrumPitchTracker::getPluginVersion() const Chris@8: { Chris@8: // Increment this each time you release a version that behaves Chris@8: // differently from the previous one Chris@8: return 1; Chris@8: } Chris@8: Chris@8: string Chris@8: CepstrumPitchTracker::getCopyright() const Chris@8: { Chris@8: return "Freely redistributable (BSD license)"; Chris@8: } Chris@8: Chris@8: CepstrumPitchTracker::InputDomain Chris@8: CepstrumPitchTracker::getInputDomain() const Chris@8: { Chris@8: return FrequencyDomain; Chris@8: } Chris@8: Chris@8: size_t Chris@8: CepstrumPitchTracker::getPreferredBlockSize() const Chris@8: { Chris@8: return 1024; Chris@8: } Chris@8: Chris@8: size_t Chris@8: CepstrumPitchTracker::getPreferredStepSize() const Chris@8: { Chris@8: return 256; Chris@8: } Chris@8: Chris@8: size_t Chris@8: CepstrumPitchTracker::getMinChannelCount() const Chris@8: { Chris@8: return 1; Chris@8: } Chris@8: Chris@8: size_t Chris@8: CepstrumPitchTracker::getMaxChannelCount() const Chris@8: { Chris@8: return 1; Chris@8: } Chris@8: Chris@8: CepstrumPitchTracker::ParameterList Chris@8: CepstrumPitchTracker::getParameterDescriptors() const Chris@8: { Chris@8: ParameterList list; Chris@8: return list; Chris@8: } Chris@8: Chris@8: float Chris@8: CepstrumPitchTracker::getParameter(string identifier) const Chris@8: { Chris@8: return 0.f; Chris@8: } Chris@8: Chris@8: void Chris@8: CepstrumPitchTracker::setParameter(string identifier, float value) Chris@8: { Chris@8: } Chris@8: Chris@8: CepstrumPitchTracker::ProgramList Chris@8: CepstrumPitchTracker::getPrograms() const Chris@8: { Chris@8: ProgramList list; Chris@8: return list; Chris@8: } Chris@8: Chris@8: string Chris@8: CepstrumPitchTracker::getCurrentProgram() const Chris@8: { Chris@8: return ""; // no programs Chris@8: } Chris@8: Chris@8: void Chris@8: CepstrumPitchTracker::selectProgram(string name) Chris@8: { Chris@8: } Chris@8: Chris@8: CepstrumPitchTracker::OutputList Chris@8: CepstrumPitchTracker::getOutputDescriptors() const Chris@8: { Chris@8: OutputList outputs; Chris@8: Chris@8: int n = 0; Chris@8: Chris@8: OutputDescriptor d; Chris@8: Chris@8: d.identifier = "f0"; Chris@8: d.name = "Estimated f0"; Chris@8: d.description = "Estimated fundamental frequency"; Chris@8: d.unit = "Hz"; Chris@8: d.hasFixedBinCount = true; Chris@8: d.binCount = 1; Chris@8: d.hasKnownExtents = true; Chris@8: d.minValue = m_fmin; Chris@8: d.maxValue = m_fmax; Chris@8: d.isQuantized = false; Chris@8: d.sampleType = OutputDescriptor::FixedSampleRate; Chris@8: d.sampleRate = (m_inputSampleRate / m_stepSize); Chris@8: d.hasDuration = false; Chris@8: outputs.push_back(d); Chris@8: Chris@8: return outputs; Chris@8: } Chris@8: Chris@8: bool Chris@8: CepstrumPitchTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) Chris@8: { Chris@8: if (channels < getMinChannelCount() || Chris@8: channels > getMaxChannelCount()) return false; Chris@8: Chris@8: // std::cerr << "CepstrumPitchTracker::initialise: channels = " << channels Chris@8: // << ", stepSize = " << stepSize << ", blockSize = " << blockSize Chris@8: // << std::endl; Chris@8: Chris@8: m_channels = channels; Chris@8: m_stepSize = stepSize; Chris@8: m_blockSize = blockSize; Chris@8: Chris@8: m_binFrom = int(m_inputSampleRate / m_fmax); Chris@8: m_binTo = int(m_inputSampleRate / m_fmin); Chris@8: Chris@8: if (m_binTo >= (int)m_blockSize / 2) { Chris@8: m_binTo = m_blockSize / 2 - 1; Chris@8: } Chris@8: Chris@8: m_bins = (m_binTo - m_binFrom) + 1; Chris@8: Chris@8: reset(); Chris@8: Chris@8: return true; Chris@8: } Chris@8: Chris@8: void Chris@8: CepstrumPitchTracker::reset() Chris@8: { Chris@8: } Chris@8: Chris@8: void Chris@20: CepstrumPitchTracker::filter(const double *cep, double *data) Chris@8: { Chris@8: for (int i = 0; i < m_bins; ++i) { Chris@10: double v = 0; Chris@10: int n = 0; Chris@10: // average according to the vertical filter length Chris@10: for (int j = -m_vflen/2; j <= m_vflen/2; ++j) { Chris@10: int ix = i + m_binFrom + j; Chris@10: if (ix >= 0 && ix < m_blockSize) { Chris@10: v += cep[ix]; Chris@10: ++n; Chris@10: } Chris@10: } Chris@20: data[i] = v / n; Chris@8: } Chris@11: } Chris@11: Chris@8: CepstrumPitchTracker::FeatureSet Chris@8: CepstrumPitchTracker::process(const float *const *inputBuffers, Vamp::RealTime timestamp) Chris@8: { Chris@8: FeatureSet fs; Chris@8: Chris@8: int bs = m_blockSize; Chris@8: int hs = m_blockSize/2 + 1; Chris@8: Chris@8: double *rawcep = new double[bs]; Chris@8: double *io = new double[bs]; Chris@8: double *logmag = new double[bs]; Chris@8: Chris@9: // The "inverse symmetric" method. Seems to be the most reliable Chris@8: Chris@8: for (int i = 0; i < hs; ++i) { Chris@8: Chris@8: double power = Chris@8: inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + Chris@8: inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; Chris@8: double mag = sqrt(power); Chris@8: Chris@8: double lm = log(mag + 0.00000001); Chris@8: Chris@9: logmag[i] = lm; Chris@9: if (i > 0) logmag[bs - i] = lm; Chris@8: } Chris@8: Chris@9: fft(bs, true, logmag, 0, rawcep, io); Chris@8: Chris@8: delete[] logmag; Chris@8: delete[] io; Chris@8: Chris@8: int n = m_bins; Chris@8: double *data = new double[n]; Chris@8: filter(rawcep, data); Chris@8: delete[] rawcep; Chris@8: Chris@11: double abstot = 0.0; Chris@11: Chris@11: for (int i = 0; i < n; ++i) { Chris@11: abstot += fabs(data[i]); Chris@11: } Chris@11: Chris@8: double maxval = 0.0; Chris@11: int maxbin = -1; Chris@8: Chris@8: for (int i = 0; i < n; ++i) { Chris@8: if (data[i] > maxval) { Chris@8: maxval = data[i]; Chris@8: maxbin = i; Chris@8: } Chris@8: } Chris@8: Chris@20: if (maxbin < 0) { Chris@20: delete[] data; Chris@20: return fs; Chris@20: } Chris@20: Chris@20: double nextPeakVal = 0.0; Chris@20: for (int i = 1; i+1 < n; ++i) { Chris@20: if (data[i] > data[i-1] && Chris@20: data[i] > data[i+1] && Chris@20: i != maxbin && Chris@20: data[i] > nextPeakVal) { Chris@20: nextPeakVal = data[i]; Chris@20: } Chris@20: } Chris@13: Chris@13: double peakfreq = m_inputSampleRate / (maxbin + m_binFrom); Chris@20: Chris@20: double confidence = 0.0; Chris@20: if (nextPeakVal != 0.0) { Chris@20: confidence = ((maxval / nextPeakVal) - 1.0) / 4.0; Chris@20: if (confidence > 1.0) confidence = 1.0; Chris@20: } Chris@20: Chris@13: Hypothesis::Estimate e; Chris@13: e.freq = peakfreq; Chris@13: e.time = timestamp; Chris@20: e.confidence = confidence; Chris@13: Chris@13: m_accepted.advanceTime(); Chris@16: Chris@13: for (int i = 0; i < m_possible.size(); ++i) { Chris@13: m_possible[i].advanceTime(); Chris@13: } Chris@13: Chris@16: if (!m_accepted.test(e)) { Chris@18: Chris@16: int candidate = -1; Chris@18: bool accepted = false; Chris@18: Chris@16: for (int i = 0; i < m_possible.size(); ++i) { Chris@16: if (m_possible[i].test(e)) { Chris@18: accepted = true; Chris@16: if (m_possible[i].getState() == Hypothesis::Satisfied) { Chris@16: candidate = i; Chris@16: } Chris@16: break; Chris@16: } Chris@16: } Chris@17: Chris@18: if (!accepted) { Chris@18: Hypothesis h; Chris@18: h.test(e); //!!! must succeed as h is new, so perhaps there should be a ctor for this Chris@18: m_possible.push_back(h); Chris@18: } Chris@18: Chris@16: if (m_accepted.getState() == Hypothesis::Expired) { Chris@16: m_accepted.addFeatures(fs[0]); Chris@17: } Chris@17: Chris@17: if (m_accepted.getState() == Hypothesis::Expired || Chris@17: m_accepted.getState() == Hypothesis::Rejected) { Chris@16: if (candidate >= 0) { Chris@16: m_accepted = m_possible[candidate]; Chris@16: } else { Chris@16: m_accepted = Hypothesis(); Chris@16: } Chris@16: } Chris@13: Chris@19: // reap rejected/expired hypotheses from possible list Chris@19: Hypotheses toReap = m_possible; Chris@19: m_possible.clear(); Chris@19: for (int i = 0; i < toReap.size(); ++i) { Chris@19: Hypothesis h = toReap[i]; Chris@19: if (h.getState() != Hypothesis::Rejected && Chris@19: h.getState() != Hypothesis::Expired) { Chris@19: m_possible.push_back(h); Chris@19: } Chris@19: } Chris@19: } Chris@19: Chris@20: std::cerr << "accepted length = " << m_accepted.getPendingLength() Chris@20: << ", state = " << m_accepted.getState() Chris@20: << ", hypothesis count = " << m_possible.size() << std::endl; Chris@17: Chris@8: delete[] data; Chris@8: return fs; Chris@8: } Chris@8: Chris@8: CepstrumPitchTracker::FeatureSet Chris@8: CepstrumPitchTracker::getRemainingFeatures() Chris@8: { Chris@8: FeatureSet fs; Chris@20: if (m_accepted.getState() == Hypothesis::Satisfied) { Chris@16: m_accepted.addFeatures(fs[0]); Chris@16: } Chris@8: return fs; Chris@8: } Chris@8: Chris@8: void Chris@8: CepstrumPitchTracker::fft(unsigned int n, bool inverse, Chris@8: double *ri, double *ii, double *ro, double *io) Chris@8: { Chris@8: if (!ri || !ro || !io) return; Chris@8: Chris@8: unsigned int bits; Chris@8: unsigned int i, j, k, m; Chris@8: unsigned int blockSize, blockEnd; Chris@8: Chris@8: double tr, ti; Chris@8: Chris@8: if (n < 2) return; Chris@8: if (n & (n-1)) return; Chris@8: Chris@8: double angle = 2.0 * M_PI; Chris@8: if (inverse) angle = -angle; Chris@8: Chris@8: for (i = 0; ; ++i) { Chris@8: if (n & (1 << i)) { Chris@8: bits = i; Chris@8: break; Chris@8: } Chris@8: } Chris@8: Chris@8: static unsigned int tableSize = 0; Chris@8: static int *table = 0; Chris@8: Chris@8: if (tableSize != n) { Chris@8: Chris@8: delete[] table; Chris@8: Chris@8: table = new int[n]; Chris@8: Chris@8: for (i = 0; i < n; ++i) { Chris@8: Chris@8: m = i; Chris@8: Chris@8: for (j = k = 0; j < bits; ++j) { Chris@8: k = (k << 1) | (m & 1); Chris@8: m >>= 1; Chris@8: } Chris@8: Chris@8: table[i] = k; Chris@8: } Chris@8: Chris@8: tableSize = n; Chris@8: } Chris@8: Chris@8: if (ii) { Chris@8: for (i = 0; i < n; ++i) { Chris@8: ro[table[i]] = ri[i]; Chris@8: io[table[i]] = ii[i]; Chris@8: } Chris@8: } else { Chris@8: for (i = 0; i < n; ++i) { Chris@8: ro[table[i]] = ri[i]; Chris@8: io[table[i]] = 0.0; Chris@8: } Chris@8: } Chris@8: Chris@8: blockEnd = 1; Chris@8: Chris@8: for (blockSize = 2; blockSize <= n; blockSize <<= 1) { Chris@8: Chris@8: double delta = angle / (double)blockSize; Chris@8: double sm2 = -sin(-2 * delta); Chris@8: double sm1 = -sin(-delta); Chris@8: double cm2 = cos(-2 * delta); Chris@8: double cm1 = cos(-delta); Chris@8: double w = 2 * cm1; Chris@8: double ar[3], ai[3]; Chris@8: Chris@8: for (i = 0; i < n; i += blockSize) { Chris@8: Chris@8: ar[2] = cm2; Chris@8: ar[1] = cm1; Chris@8: Chris@8: ai[2] = sm2; Chris@8: ai[1] = sm1; Chris@8: Chris@8: for (j = i, m = 0; m < blockEnd; j++, m++) { Chris@8: Chris@8: ar[0] = w * ar[1] - ar[2]; Chris@8: ar[2] = ar[1]; Chris@8: ar[1] = ar[0]; Chris@8: Chris@8: ai[0] = w * ai[1] - ai[2]; Chris@8: ai[2] = ai[1]; Chris@8: ai[1] = ai[0]; Chris@8: Chris@8: k = j + blockEnd; Chris@8: tr = ar[0] * ro[k] - ai[0] * io[k]; Chris@8: ti = ar[0] * io[k] + ai[0] * ro[k]; Chris@8: Chris@8: ro[k] = ro[j] - tr; Chris@8: io[k] = io[j] - ti; Chris@8: Chris@8: ro[j] += tr; Chris@8: io[j] += ti; Chris@8: } Chris@8: } Chris@8: Chris@8: blockEnd = blockSize; Chris@8: } Chris@8: } Chris@8: Chris@8: