Mercurial > hg > tipic
view src/PitchFilterbank.cpp @ 28:7b618e3f9a8b
Calculate accurate window boundaries (even where the effective hop size is non-integer)
author | Chris Cannam |
---|---|
date | Wed, 30 Sep 2015 11:43:40 +0100 |
parents | cb86b8b7ed90 |
children | 63959419587f |
line wrap: on
line source
#include "PitchFilterbank.h" #include "Resampler.h" #include "Filter.h" #include "delays.h" #include "filter-a.h" #include "filter-b.h" #include <deque> #include <map> #include <stdexcept> #include <iostream> #include <cmath> using namespace std; static const int HIGHEST_FILTER_INDEX_AT_882 = 38; // MIDI pitch 59 static const int HIGHEST_FILTER_INDEX_AT_4410 = 74; // MIDI pitch 95 static const int HIGHEST_FILTER_INDEX_AT_22050 = 87; // MIDI pitch 108 static const int HIGHEST_FILTER_INDEX = HIGHEST_FILTER_INDEX_AT_22050; class PitchFilterbank::D { public: D(int sampleRate, double tuningFrequency) : m_nfilters(HIGHEST_FILTER_INDEX + 1), m_sampleRate(sampleRate), m_tuningFrequency(tuningFrequency), m_blockNo(0) { // To handle a non-440Hz tuning frequency, we resample the // input and then adjust the output block timings // accordingly. For a tuning freq >440 we want to lower the // pitch of the input audio by slowing it down, therefore we // want to pretend that it came in at a lower sample rate than // it really did; for >440 the opposite applies. The effective // input sample rate is the rate at which we pretend the audio // was supplied. Rounding to the nearest int (because our // resampler only supports integer rates) gives around 0.1Hz // quantization close to 440Hz in 44.1kHz audio -- we could do // better by using multiples of our source and target sample // rates, but I think it probably isn't necessary. m_effectiveInputSampleRate = int(round(m_sampleRate * (440.0 / m_tuningFrequency))); //!!! todo: tuning frequency adjustment // * resample input by a small amount // * adjust output block timings by a small amount //!!! nb "we use forward-backward filtering such that the // resulting output signal has precisely zero phase distortion // and a magnitude modified by the square of the filter’s // magnitude response" -- we are not doing forward-backward // here & so need to adapt magnitudes in compensation to match // original m_resamplers[882] = new Resampler(m_effectiveInputSampleRate, 882); m_resamplers[4410] = new Resampler(m_effectiveInputSampleRate, 4410); m_resamplers[22050] = new Resampler(m_effectiveInputSampleRate, 22050); for (int i = 0; i < m_nfilters; ++i) { int ix = i + 20; int coeffs = sizeof(filter_a[0]) / sizeof(filter_a[0][0]); vector<double> a(filter_a[ix], filter_a[ix] + coeffs); vector<double> b(filter_b[ix], filter_b[ix] + coeffs); m_filters.push_back(new Filter({ a, b })); m_toCompensate.push_back(totalDelay(i)); } m_filtered.resize(m_nfilters); m_energies.resize(m_nfilters); } ~D() { for (auto f: m_filters) delete f; for (auto r: m_resamplers) delete r.second; } int getSampleRate() const { return m_sampleRate; } double getTuningFrequency() const { return m_tuningFrequency; } RealBlock process(const RealSequence &in) { for (auto r: m_resamplers) { m_resampled[r.first] = r.second->process(in.data(), in.size()); } for (int i = 0; i < m_nfilters; ++i) { int rate = filterRate(i); if (m_resampled.find(rate) == m_resampled.end()) { throw logic_error("No resampled output for rate of filter"); } pushFiltered(i, m_resampled[rate], false); } return energiesFromFiltered(false); } RealBlock getRemainingOutput() { for (auto r: m_resamplers) { RealSequence in(r.second->getLatency(), 0.0); m_resampled[r.first] = r.second->process(in.data(), in.size()); } for (int i = 0; i < m_nfilters; ++i) { int rate = filterRate(i); pushFiltered(i, m_resampled[rate], true); } return energiesFromFiltered(true); } struct WindowPosition { uint64_t start; int size; double factor; }; WindowPosition windowPosition(int block, int i) { //!!! todo make this known through api. these values are at 22050Hz uint64_t hop = 2205; double rate = filterRate(i); double topRate = 22050.0; double rateRatio = topRate / rate; double tuningRatio = m_sampleRate / double(m_effectiveInputSampleRate); double sizeRatio = tuningRatio / rateRatio; uint64_t start(round((hop * block) * sizeRatio)); int size(round((hop * 2) * sizeRatio)); // cerr << "block " << block << ", i " << i << ": start " << start << ", size " // << size << endl; return { start, size, rateRatio }; } RealBlock energiesFromFiltered(bool drain) { //!!! This is all quite inefficient -- we're counting //!!! everything twice. Since there is no actual window shape, //!!! isn't the overlap just averaging? for (int i = 0; i < m_nfilters; ++i) { WindowPosition here = windowPosition(m_blockNo, i); WindowPosition next = windowPosition(m_blockNo + 1, i); int n = here.size; int hop = next.start - here.start; unsigned int minReq = n; if (drain) minReq = hop; //!!! strictly, we don't actually need to store the //!!! filtered outputs since our overlapped windows are //!!! not shaped -- each one is the sum of two half-size //!!! windows, so we can just push the energies of those //!!! directly. that's a TODO while (m_filtered[i].size() >= minReq) { double energy = calculateEnergy(m_filtered[i], n, here.factor); m_energies[i].push_back(energy); m_filtered[i] = RealSequence(m_filtered[i].begin() + hop, m_filtered[i].end()); } } ++m_blockNo; int minCols = 0, maxCols = 0; for (int i = 0; i < m_nfilters; ++i) { int n = m_energies[i].size(); if (i == 0 || n < minCols) minCols = n; if (i == 0 || n > maxCols) maxCols = n; } RealBlock out; if (drain) { out.resize(maxCols); for (int j = 0; j < maxCols; ++j) { for (int i = 0; i < m_nfilters; ++i) { if (m_energies[i].size() == 0) { out[j].push_back(0.0); } else { out[j].push_back(m_energies[i][0]); m_energies[i].pop_front(); } } } } else { out.resize(minCols); for (int j = 0; j < minCols; ++j) { for (int i = 0; i < m_nfilters; ++i) { out[j].push_back(m_energies[i][0]); m_energies[i].pop_front(); } } } return out; } void pushFiltered(int i, const RealSequence &resampled, bool drain) { RealSequence in(resampled); if (drain) { RealSequence pad(filterDelay(i), 0.0); in.insert(in.end(), pad.begin(), pad.end()); } int n = in.size(); RealSequence filtered(n, 0.0); m_filters[i]->process(in.data(), filtered.data(), n); int pushStart = 0, pushCount = n; if (m_toCompensate[i] > 0) { pushCount = max(pushCount - m_toCompensate[i], 0); int compensating = n - pushCount; pushStart += compensating; m_toCompensate[i] -= compensating; if (m_toCompensate[i] < 0) { throw logic_error("Compensated for more latency than exists"); } } m_filtered[i].insert(m_filtered[i].end(), filtered.begin() + pushStart, filtered.begin() + pushStart + pushCount); } double calculateEnergy(const RealSequence &seq, int n, double factor) { double energy = 0.0; int sz = seq.size(); if (n > sz) n = sz; for (int i = 0; i < n; ++i) { energy += seq[i] * seq[i]; } return energy * factor; } private: int m_nfilters; int m_sampleRate; int m_effectiveInputSampleRate; double m_tuningFrequency; // This vector is initialised with 88 filter instances. // m_filters[n] (for n from 0 to 87) is for MIDI pitch 21+n, so we // span the MIDI range from pitch 21 to 108. Then m_filters[n] has // coefficients drawn from filter_a[20+n] and filter_b[20+n], and // has effective delay filter_delay[20+n]. vector<Filter *> m_filters; map<int, Resampler *> m_resamplers; // rate -> resampler map<int, RealSequence> m_resampled; vector<int> m_toCompensate; // latency remaining at start, per filter vector<RealSequence> m_filtered; vector<deque<double>> m_energies; int m_blockNo; Resampler *resamplerFor(int filterIndex) { int rate = filterRate(filterIndex); if (m_resamplers.find(rate) == m_resamplers.end()) { throw std::logic_error("Filter index has unknown or unexpected rate"); } return m_resamplers[rate]; } int filterRate(int filterIndex) const { if (filterIndex <= HIGHEST_FILTER_INDEX_AT_882) { return 882; } else if (filterIndex <= HIGHEST_FILTER_INDEX_AT_4410) { return 4410; } else { return 22050; } } int resamplerDelay(int filterIndex) const { return const_cast<D *>(this)->resamplerFor(filterIndex)->getLatency(); } int filterDelay(int filterIndex) const { return filter_delay[20 + filterIndex]; } int totalDelay(int filterIndex) const { return resamplerDelay(filterIndex) + filterDelay(filterIndex); } }; PitchFilterbank::PitchFilterbank(int sampleRate, double tuningFrequency) : m_d(new D(sampleRate, tuningFrequency)) { } PitchFilterbank::~PitchFilterbank() { delete m_d; } void PitchFilterbank::reset() { int rate = m_d->getSampleRate(); double freq = m_d->getTuningFrequency(); delete m_d; m_d = new D(rate, freq); } RealBlock PitchFilterbank::process(const RealSequence &in) { return m_d->process(in); } RealBlock PitchFilterbank::getRemainingOutput() { return m_d->getRemainingOutput(); } void PitchFilterbank::getPitchRange(int &minMidiPitch, int &maxMidiPitch) { minMidiPitch = 21; maxMidiPitch = 108; }