view src/PitchFilterbank.cpp @ 16:68fe8789b7ef

Comment
author Chris Cannam
date Tue, 18 Aug 2015 11:13:38 +0100
parents 368bb5a1b4bf
children c785eaaeac40
line wrap: on
line source

#include "PitchFilterbank.h"

#include "Resampler.h"
#include "Filter.h"

#include "delays.h"
#include "filter-a.h"
#include "filter-b.h"

#include <deque>
#include <map>
#include <stdexcept>
#include <iostream>

using namespace std;

static const int HIGHEST_FILTER_INDEX_AT_882 = 38;   // MIDI pitch 59
static const int HIGHEST_FILTER_INDEX_AT_4410 = 74;  // MIDI pitch 95
static const int HIGHEST_FILTER_INDEX_AT_22050 = 87; // MIDI pitch 108
static const int HIGHEST_FILTER_INDEX = HIGHEST_FILTER_INDEX_AT_22050;

class PitchFilterbank::D
{
public:
    D(int sampleRate, float tuningFrequency) :
	m_nfilters(HIGHEST_FILTER_INDEX + 1),
	m_sampleRate(sampleRate),
	m_tuningFrequency(tuningFrequency)
    {
	//!!! todo: tuning frequency adjustment
	// * resample input by a small amount
	// * adjust output block timings by a small amount
	
	m_resamplers[882] = new Resampler(sampleRate, 882);
	m_resamplers[4410] = new Resampler(sampleRate, 4410);
	m_resamplers[22050] = new Resampler(sampleRate, 22050);
	
	for (int i = 0; i < m_nfilters; ++i) {
	    int ix = i + 20;
	    int coeffs = sizeof(filter_a[0]) / sizeof(filter_a[0][0]);
	    vector<double> a(filter_a[ix], filter_a[ix] + coeffs);
	    vector<double> b(filter_b[ix], filter_b[ix] + coeffs);
	    m_filters.push_back(new Filter({ a, b }));
	    m_toCompensate.push_back(totalDelay(i));
	}

	m_filtered.resize(m_nfilters);
	m_energies.resize(m_nfilters);
    }
	
    ~D() {
	for (auto f: m_filters) delete f;
	for (auto r: m_resamplers) delete r.second;
    }

    int getSampleRate() const { return m_sampleRate; }
    float getTuningFrequency() const { return m_tuningFrequency; }
    
    /// A series of real-valued samples ordered in time.
    typedef vector<double> RealSequence;

    /// A series of real-valued samples ordered by bin (frequency or similar).
    typedef vector<double> RealColumn;

    /// A matrix of real-valued samples, indexed by time then bin number.
    typedef vector<RealColumn> RealBlock;

    RealBlock process(const RealSequence &in) {

	for (auto r: m_resamplers) {
	    m_resampled[r.first] = r.second->process(in.data(), in.size());
	}

	for (int i = 0; i < m_nfilters; ++i) {
	    int rate = filterRate(i);
	    if (m_resampled.find(rate) == m_resampled.end()) {
		throw logic_error("No resampled output for rate of filter");
	    }
	    pushFiltered(i, m_resampled[rate], false);
	}

	return energiesFromFiltered(false);
    }

    RealBlock getRemainingOutput() {

	for (auto r: m_resamplers) {
	    RealSequence in(r.second->getLatency(), 0.0);
	    m_resampled[r.first] = r.second->process(in.data(), in.size());
	}

	for (int i = 0; i < m_nfilters; ++i) {
	    int rate = filterRate(i);
	    pushFiltered(i, m_resampled[rate], true);
	}
	
	return energiesFromFiltered(true);
    }
    
    RealBlock energiesFromFiltered(bool drain) {
	
	//!!! todo make this known through api. these values are at 22050Hz
	int windowSize = 4410;

	//!!! This is all quite inefficient -- we're counting
	//!!! everything twice. Since there is no actual window shape,
	//!!! isn't the overlap just averaging?

	for (int i = 0; i < m_nfilters; ++i) {

	    double factor = 22050.0 / filterRate(i);
	    //!!! Problem -- this is not an integer, for
	    //!!! fs=882 (it's 176.4)
	    int n = windowSize / factor;
	    int hop = n / 2;

	    unsigned int minReq = n;
	    if (drain) minReq = hop;

	    //!!! strictly, we don't actually need to store the
	    //!!! filtered outputs since our overlapped windows are
	    //!!! not shaped -- each one is the sum of two half-size
	    //!!! windows, so we can just push the energies of those
	    //!!! directly. that's a TODO
	    
	    while (m_filtered[i].size() >= minReq) {
		double energy = calculateEnergy(m_filtered[i], n, factor);
		m_energies[i].push_back(energy);
		m_filtered[i] = RealSequence(m_filtered[i].begin() + hop,
					     m_filtered[i].end());
	    }
	}

	int minCols = 0, maxCols = 0;
	for (int i = 0; i < m_nfilters; ++i) {
	    int n = m_energies[i].size();
	    if (i == 0 || n < minCols) minCols = n;
	    if (i == 0 || n > maxCols) maxCols = n;
	}

	RealBlock out;
	
	if (drain) {
	    out.resize(maxCols);
	    for (int j = 0; j < maxCols; ++j) {
		for (int i = 0; i < m_nfilters; ++i) {
		    if (m_energies[i].size() == 0) {
			out[j].push_back(0.0);
		    } else {
			out[j].push_back(m_energies[i][0]);
			m_energies[i].pop_front();
		    }
		}
	    }
	} else {
	    out.resize(minCols);
	    for (int j = 0; j < minCols; ++j) {
		for (int i = 0; i < m_nfilters; ++i) {
		    out[j].push_back(m_energies[i][0]);
		    m_energies[i].pop_front();
		}
	    }
	}

	return out;
    }
    
    void pushFiltered(int i, const RealSequence &resampled, bool drain) {

	RealSequence in(resampled);
	if (drain) {
	    RealSequence pad(filterDelay(i), 0.0);
	    in.insert(in.end(), pad.begin(), pad.end());
	}
	
	int n = in.size();
	RealSequence filtered(n, 0.0);
	m_filters[i]->process(in.data(), filtered.data(), n);

	int pushStart = 0, pushCount = n;

	if (m_toCompensate[i] > 0) {
	    pushCount = max(pushCount - m_toCompensate[i], 0);
	    int compensating = n - pushCount;
	    pushStart += compensating;
	    m_toCompensate[i] -= compensating;
	    if (m_toCompensate[i] < 0) {
		throw logic_error("Compensated for more latency than exists");
	    }
	}
	
	m_filtered[i].insert(m_filtered[i].end(),
			     filtered.begin() + pushStart,
			     filtered.begin() + pushStart + pushCount);
    }	

    double calculateEnergy(const RealSequence &seq, int n, double factor) {
	double energy = 0.0;
	int sz = seq.size();
	if (n > sz) n = sz;
	for (int i = 0; i < n; ++i) {
	    energy += seq[i] * seq[i];
	}
	return energy * factor;
    }
    
private:
    int m_nfilters;
    int m_sampleRate;
    float m_tuningFrequency;

    // This vector is initialised with 88 filter instances.
    // m_filters[n] (for n from 0 to 87) is for MIDI pitch 21+n, so we
    // span the MIDI range from pitch 21 to 108. Then m_filters[n] has
    // coefficients drawn from filter_a[20+n] and filter_b[20+n], and
    // has effective delay filter_delay[20+n].
    vector<Filter *> m_filters;

    map<int, Resampler *> m_resamplers; // rate -> resampler
    map<int, RealSequence> m_resampled;
    vector<int> m_toCompensate; // latency remaining at start, per filter
    vector<RealSequence> m_filtered;
    vector<deque<double>> m_energies;

    Resampler *resamplerFor(int filterIndex) {
	int rate = filterRate(filterIndex);
	if (m_resamplers.find(rate) == m_resamplers.end()) {
	    throw std::logic_error("Filter index has unknown or unexpected rate");
	}
	return m_resamplers[rate];
    }

    int filterRate(int filterIndex) const {
	if (filterIndex <= HIGHEST_FILTER_INDEX_AT_882) {
	    return 882;
	} else if (filterIndex <= HIGHEST_FILTER_INDEX_AT_4410) {
	    return 4410;
	} else {
	    return 22050;
	}
    }
    int resamplerDelay(int filterIndex) const {
	return const_cast<D *>(this)->resamplerFor(filterIndex)->getLatency();
    }
    int filterDelay(int filterIndex) const {
	return filter_delay[20 + filterIndex];
    }
    int totalDelay(int filterIndex) const {
	return resamplerDelay(filterIndex) + filterDelay(filterIndex);
    }
};

PitchFilterbank::PitchFilterbank(int sampleRate, float tuningFrequency) :
    m_d(new D(sampleRate, tuningFrequency))
{
}

PitchFilterbank::~PitchFilterbank()
{
    delete m_d;
}

void
PitchFilterbank::reset()
{
    int rate = m_d->getSampleRate();
    float freq = m_d->getTuningFrequency();
    delete m_d;
    m_d = new D(rate, freq);
}

PitchFilterbank::RealBlock
PitchFilterbank::process(const RealSequence &in)
{
    return m_d->process(in);
}

PitchFilterbank::RealBlock
PitchFilterbank::getRemainingOutput()
{
    return m_d->getRemainingOutput();
}