Mercurial > hg > tipic
changeset 28:7b618e3f9a8b
Calculate accurate window boundaries (even where the effective hop size is non-integer)
author | Chris Cannam |
---|---|
date | Wed, 30 Sep 2015 11:43:40 +0100 |
parents | cb86b8b7ed90 |
children | 63959419587f |
files | src/PitchFilterbank.cpp |
diffstat | 1 files changed, 51 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/src/PitchFilterbank.cpp Wed Sep 30 10:26:24 2015 +0100 +++ b/src/PitchFilterbank.cpp Wed Sep 30 11:43:40 2015 +0100 @@ -28,19 +28,23 @@ D(int sampleRate, double tuningFrequency) : m_nfilters(HIGHEST_FILTER_INDEX + 1), m_sampleRate(sampleRate), - m_tuningFrequency(tuningFrequency) + m_tuningFrequency(tuningFrequency), + m_blockNo(0) { - // To handle a non-440 tuning frequency, we resample the input - // by this tuning ratio and then adjust the output block - // timings accordingly. Ratio is calculated on the basis that - // for tuning freq >440 we want to lower the pitch of the - // input audio by slowing it down, therefore we want to - // pretend that it came in at a lower sample rate than it - // really did, and for >440 the opposite applies. The - // effective input sample rate is the rate at which we pretend - // the audio was supplied. - m_tuningRatio = 440.0 / m_tuningFrequency; - m_effectiveInputSampleRate = int(round(m_sampleRate * m_tuningRatio)); + // To handle a non-440Hz tuning frequency, we resample the + // input and then adjust the output block timings + // accordingly. For a tuning freq >440 we want to lower the + // pitch of the input audio by slowing it down, therefore we + // want to pretend that it came in at a lower sample rate than + // it really did; for >440 the opposite applies. The effective + // input sample rate is the rate at which we pretend the audio + // was supplied. Rounding to the nearest int (because our + // resampler only supports integer rates) gives around 0.1Hz + // quantization close to 440Hz in 44.1kHz audio -- we could do + // better by using multiples of our source and target sample + // rates, but I think it probably isn't necessary. + m_effectiveInputSampleRate = + int(round(m_sampleRate * (440.0 / m_tuningFrequency))); //!!! todo: tuning frequency adjustment // * resample input by a small amount @@ -109,11 +113,34 @@ return energiesFromFiltered(true); } + + struct WindowPosition { + uint64_t start; + int size; + double factor; + }; + + WindowPosition windowPosition(int block, int i) { + + //!!! todo make this known through api. these values are at 22050Hz + uint64_t hop = 2205; + + double rate = filterRate(i); + double topRate = 22050.0; + double rateRatio = topRate / rate; + double tuningRatio = m_sampleRate / double(m_effectiveInputSampleRate); + double sizeRatio = tuningRatio / rateRatio; + + uint64_t start(round((hop * block) * sizeRatio)); + int size(round((hop * 2) * sizeRatio)); + +// cerr << "block " << block << ", i " << i << ": start " << start << ", size " +// << size << endl; + + return { start, size, rateRatio }; + } RealBlock energiesFromFiltered(bool drain) { - - //!!! todo make this known through api. these values are at 22050Hz - int windowSize = 4410; //!!! This is all quite inefficient -- we're counting //!!! everything twice. Since there is no actual window shape, @@ -121,11 +148,11 @@ for (int i = 0; i < m_nfilters; ++i) { - double factor = 22050.0 / filterRate(i); - //!!! Problem -- this is not an integer, for - //!!! fs=882 (it's 176.4) - int n = windowSize / factor; - int hop = n / 2; + WindowPosition here = windowPosition(m_blockNo, i); + WindowPosition next = windowPosition(m_blockNo + 1, i); + + int n = here.size; + int hop = next.start - here.start; unsigned int minReq = n; if (drain) minReq = hop; @@ -137,13 +164,15 @@ //!!! directly. that's a TODO while (m_filtered[i].size() >= minReq) { - double energy = calculateEnergy(m_filtered[i], n, factor); + double energy = calculateEnergy(m_filtered[i], n, here.factor); m_energies[i].push_back(energy); m_filtered[i] = RealSequence(m_filtered[i].begin() + hop, m_filtered[i].end()); } } + ++m_blockNo; + int minCols = 0, maxCols = 0; for (int i = 0; i < m_nfilters; ++i) { int n = m_energies[i].size(); @@ -222,7 +251,6 @@ int m_sampleRate; int m_effectiveInputSampleRate; double m_tuningFrequency; - double m_tuningRatio; // This vector is initialised with 88 filter instances. // m_filters[n] (for n from 0 to 87) is for MIDI pitch 21+n, so we @@ -236,6 +264,7 @@ vector<int> m_toCompensate; // latency remaining at start, per filter vector<RealSequence> m_filtered; vector<deque<double>> m_energies; + int m_blockNo; Resampler *resamplerFor(int filterIndex) { int rate = filterRate(filterIndex);