Chris@152: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@152: Chris@152: /* Chris@152: Sonic Visualiser Chris@152: An audio file viewer and annotation editor. Chris@152: Centre for Digital Music, Queen Mary, University of London. Chris@152: This file copyright 2006 Chris Cannam. Chris@152: Chris@152: This program is free software; you can redistribute it and/or Chris@152: modify it under the terms of the GNU General Public License as Chris@152: published by the Free Software Foundation; either version 2 of the Chris@152: License, or (at your option) any later version. See the file Chris@152: COPYING included with this distribution for more information. Chris@152: */ Chris@152: Chris@152: #include "FFTModel.h" Chris@152: #include "DenseTimeValueModel.h" Chris@152: Chris@183: #include "base/Profiler.h" Chris@275: #include "base/Pitch.h" Chris@1256: #include "base/HitCount.h" Chris@1428: #include "base/Debug.h" Chris@1573: #include "base/MovingMedian.h" Chris@183: Chris@402: #include Chris@402: Chris@152: #include Chris@1090: #include Chris@152: Chris@1090: using namespace std; Chris@1090: Chris@1256: static HitCount inSmallCache("FFTModel: Small FFT cache"); Chris@1256: static HitCount inSourceCache("FFTModel: Source data cache"); Chris@1256: Chris@152: FFTModel::FFTModel(const DenseTimeValueModel *model, Chris@152: int channel, Chris@152: WindowType windowType, Chris@929: int windowSize, Chris@929: int windowIncrement, Chris@1090: int fftSize) : Chris@1090: m_model(model), Chris@1090: m_channel(channel), Chris@1090: m_windowType(windowType), Chris@1090: m_windowSize(windowSize), Chris@1090: m_windowIncrement(windowIncrement), Chris@1090: m_fftSize(fftSize), Chris@1091: m_windower(windowType, windowSize), Chris@1093: m_fft(fftSize), Chris@1371: m_cacheWriteIndex(0), Chris@1093: m_cacheSize(3) Chris@152: { Chris@1371: while (m_cached.size() < m_cacheSize) { Chris@1371: m_cached.push_back({ -1, cvec(m_fftSize / 2 + 1) }); Chris@1371: } Chris@1371: Chris@1091: if (m_windowSize > m_fftSize) { Chris@1428: SVCERR << "ERROR: FFTModel::FFTModel: window size (" << m_windowSize Chris@1428: << ") must be at least FFT size (" << m_fftSize << ")" << endl; Chris@1091: throw invalid_argument("FFTModel window size must be at least FFT size"); Chris@1091: } Chris@1133: Chris@1270: m_fft.initFloat(); Chris@1270: Chris@1133: connect(model, SIGNAL(modelChanged()), this, SIGNAL(modelChanged())); Chris@1133: connect(model, SIGNAL(modelChangedWithin(sv_frame_t, sv_frame_t)), Chris@1133: this, SIGNAL(modelChangedWithin(sv_frame_t, sv_frame_t))); Chris@152: } Chris@152: Chris@152: FFTModel::~FFTModel() Chris@152: { Chris@152: } Chris@152: Chris@360: void Chris@360: FFTModel::sourceModelAboutToBeDeleted() Chris@360: { Chris@1090: if (m_model) { Chris@1428: SVDEBUG << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl; Chris@1582: m_model = nullptr; Chris@360: } Chris@360: } Chris@360: Chris@1091: int Chris@1091: FFTModel::getWidth() const Chris@1091: { Chris@1091: if (!m_model) return 0; Chris@1091: return int((m_model->getEndFrame() - m_model->getStartFrame()) Chris@1091: / m_windowIncrement) + 1; Chris@1091: } Chris@1091: Chris@1091: int Chris@1091: FFTModel::getHeight() const Chris@1091: { Chris@1091: return m_fftSize / 2 + 1; Chris@1091: } Chris@1091: Chris@152: QString Chris@929: FFTModel::getBinName(int n) const Chris@152: { Chris@1040: sv_samplerate_t sr = getSampleRate(); Chris@152: if (!sr) return ""; Chris@204: QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2)); Chris@152: return name; Chris@152: } Chris@152: Chris@1091: FFTModel::Column Chris@1091: FFTModel::getColumn(int x) const Chris@1091: { Chris@1091: auto cplx = getFFTColumn(x); Chris@1091: Column col; Chris@1154: col.reserve(cplx.size()); Chris@1091: for (auto c: cplx) col.push_back(abs(c)); Chris@1319: return col; Chris@1091: } Chris@1091: Chris@1200: FFTModel::Column Chris@1200: FFTModel::getPhases(int x) const Chris@1200: { Chris@1200: auto cplx = getFFTColumn(x); Chris@1200: Column col; Chris@1200: col.reserve(cplx.size()); Chris@1201: for (auto c: cplx) { Chris@1201: col.push_back(arg(c)); Chris@1201: } Chris@1319: return col; Chris@1200: } Chris@1200: Chris@1091: float Chris@1091: FFTModel::getMagnitudeAt(int x, int y) const Chris@1091: { Chris@1569: if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) { Chris@1569: return 0.f; Chris@1569: } Chris@1093: auto col = getFFTColumn(x); Chris@1093: return abs(col[y]); Chris@1091: } Chris@1091: Chris@1091: float Chris@1091: FFTModel::getMaximumMagnitudeAt(int x) const Chris@1091: { Chris@1091: Column col(getColumn(x)); Chris@1092: float max = 0.f; Chris@1154: int n = int(col.size()); Chris@1154: for (int i = 0; i < n; ++i) { Chris@1092: if (col[i] > max) max = col[i]; Chris@1092: } Chris@1092: return max; Chris@1091: } Chris@1091: Chris@1091: float Chris@1091: FFTModel::getPhaseAt(int x, int y) const Chris@1091: { Chris@1093: if (x < 0 || x >= getWidth() || y < 0 || y >= getHeight()) return 0.f; Chris@1091: return arg(getFFTColumn(x)[y]); Chris@1091: } Chris@1091: Chris@1091: void Chris@1091: FFTModel::getValuesAt(int x, int y, float &re, float &im) const Chris@1091: { Chris@1091: auto col = getFFTColumn(x); Chris@1091: re = col[y].real(); Chris@1091: im = col[y].imag(); Chris@1091: } Chris@1091: Chris@1091: bool Chris@1091: FFTModel::getMagnitudesAt(int x, float *values, int minbin, int count) const Chris@1091: { Chris@1091: if (count == 0) count = getHeight(); Chris@1091: auto col = getFFTColumn(x); Chris@1091: for (int i = 0; i < count; ++i) { Chris@1091: values[i] = abs(col[minbin + i]); Chris@1091: } Chris@1091: return true; Chris@1091: } Chris@1091: Chris@1091: bool Chris@1091: FFTModel::getPhasesAt(int x, float *values, int minbin, int count) const Chris@1091: { Chris@1091: if (count == 0) count = getHeight(); Chris@1091: auto col = getFFTColumn(x); Chris@1091: for (int i = 0; i < count; ++i) { Chris@1091: values[i] = arg(col[minbin + i]); Chris@1091: } Chris@1091: return true; Chris@1091: } Chris@1091: Chris@1091: bool Chris@1091: FFTModel::getValuesAt(int x, float *reals, float *imags, int minbin, int count) const Chris@1091: { Chris@1091: if (count == 0) count = getHeight(); Chris@1091: auto col = getFFTColumn(x); Chris@1091: for (int i = 0; i < count; ++i) { Chris@1091: reals[i] = col[minbin + i].real(); Chris@1091: } Chris@1091: for (int i = 0; i < count; ++i) { Chris@1091: imags[i] = col[minbin + i].imag(); Chris@1091: } Chris@1091: return true; Chris@1091: } Chris@1091: Chris@1326: FFTModel::fvec Chris@1091: FFTModel::getSourceSamples(int column) const Chris@1091: { Chris@1094: // m_fftSize may be greater than m_windowSize, but not the reverse Chris@1094: Chris@1094: // cerr << "getSourceSamples(" << column << ")" << endl; Chris@1094: Chris@1091: auto range = getSourceSampleRange(column); Chris@1094: auto data = getSourceData(range); Chris@1094: Chris@1091: int off = (m_fftSize - m_windowSize) / 2; Chris@1094: Chris@1094: if (off == 0) { Chris@1094: return data; Chris@1094: } else { Chris@1094: vector pad(off, 0.f); Chris@1326: fvec padded; Chris@1094: padded.reserve(m_fftSize); Chris@1094: padded.insert(padded.end(), pad.begin(), pad.end()); Chris@1094: padded.insert(padded.end(), data.begin(), data.end()); Chris@1094: padded.insert(padded.end(), pad.begin(), pad.end()); Chris@1094: return padded; Chris@1094: } Chris@1094: } Chris@1094: Chris@1326: FFTModel::fvec Chris@1094: FFTModel::getSourceData(pair range) const Chris@1094: { Chris@1094: // cerr << "getSourceData(" << range.first << "," << range.second Chris@1094: // << "): saved range is (" << m_savedData.range.first Chris@1094: // << "," << m_savedData.range.second << ")" << endl; Chris@1094: Chris@1100: if (m_savedData.range == range) { Chris@1256: inSourceCache.hit(); Chris@1100: return m_savedData.data; Chris@1100: } Chris@1094: Chris@1270: Profiler profiler("FFTModel::getSourceData (cache miss)"); Chris@1270: Chris@1094: if (range.first < m_savedData.range.second && Chris@1094: range.first >= m_savedData.range.first && Chris@1094: range.second > m_savedData.range.second) { Chris@1094: Chris@1256: inSourceCache.partial(); Chris@1256: Chris@1100: sv_frame_t discard = range.first - m_savedData.range.first; Chris@1100: Chris@1457: fvec data; Chris@1457: data.reserve(range.second - range.first); Chris@1094: Chris@1457: data.insert(data.end(), Chris@1457: m_savedData.data.begin() + discard, Chris@1457: m_savedData.data.end()); Chris@1100: Chris@1457: fvec rest = getSourceDataUncached Chris@1457: ({ m_savedData.range.second, range.second }); Chris@1457: Chris@1457: data.insert(data.end(), rest.begin(), rest.end()); Chris@1094: Chris@1457: m_savedData = { range, data }; Chris@1457: return data; Chris@1095: Chris@1095: } else { Chris@1095: Chris@1256: inSourceCache.miss(); Chris@1256: Chris@1095: auto data = getSourceDataUncached(range); Chris@1095: m_savedData = { range, data }; Chris@1095: return data; Chris@1094: } Chris@1095: } Chris@1094: Chris@1326: FFTModel::fvec Chris@1095: FFTModel::getSourceDataUncached(pair range) const Chris@1095: { Chris@1457: Profiler profiler("FFTModel::getSourceDataUncached"); Chris@1457: Chris@1091: decltype(range.first) pfx = 0; Chris@1091: if (range.first < 0) { Chris@1091: pfx = -range.first; Chris@1091: range = { 0, range.second }; Chris@1091: } Chris@1096: Chris@1096: auto data = m_model->getData(m_channel, Chris@1096: range.first, Chris@1096: range.second - range.first); Chris@1096: Chris@1281: if (data.empty()) { Chris@1281: SVDEBUG << "NOTE: empty source data for range (" << range.first << "," Chris@1281: << range.second << ") (model end frame " Chris@1281: << m_model->getEndFrame() << ")" << endl; Chris@1281: } Chris@1281: Chris@1096: // don't return a partial frame Chris@1096: data.resize(range.second - range.first, 0.f); Chris@1096: Chris@1096: if (pfx > 0) { Chris@1096: vector pad(pfx, 0.f); Chris@1096: data.insert(data.begin(), pad.begin(), pad.end()); Chris@1096: } Chris@1096: Chris@1091: if (m_channel == -1) { Chris@1429: int channels = m_model->getChannelCount(); Chris@1429: if (channels > 1) { Chris@1096: int n = int(data.size()); Chris@1096: float factor = 1.f / float(channels); Chris@1100: // use mean instead of sum for fft model input Chris@1429: for (int i = 0; i < n; ++i) { Chris@1429: data[i] *= factor; Chris@1429: } Chris@1429: } Chris@1091: } Chris@1094: Chris@1094: return data; Chris@1091: } Chris@1091: Chris@1371: const FFTModel::cvec & Chris@1093: FFTModel::getFFTColumn(int n) const Chris@1091: { Chris@1258: // The small cache (i.e. the m_cached deque) is for cases where Chris@1258: // values are looked up individually, and for e.g. peak-frequency Chris@1258: // spectrograms where values from two consecutive columns are Chris@1257: // needed at once. This cache gets essentially no hits when Chris@1258: // scrolling through a magnitude spectrogram, but 95%+ hits with a Chris@1569: // peak-frequency spectrogram or spectrum. Chris@1257: for (const auto &incache : m_cached) { Chris@1093: if (incache.n == n) { Chris@1256: inSmallCache.hit(); Chris@1093: return incache.col; Chris@1093: } Chris@1093: } Chris@1256: inSmallCache.miss(); Chris@1258: Chris@1258: Profiler profiler("FFTModel::getFFTColumn (cache miss)"); Chris@1093: Chris@1093: auto samples = getSourceSamples(n); Chris@1567: m_windower.cut(samples.data() + (m_fftSize - m_windowSize) / 2); Chris@1270: breakfastquay::v_fftshift(samples.data(), m_fftSize); Chris@1270: Chris@1371: cvec &col = m_cached[m_cacheWriteIndex].col; Chris@1270: Chris@1270: m_fft.forwardInterleaved(samples.data(), Chris@1270: reinterpret_cast(col.data())); Chris@1093: Chris@1371: m_cached[m_cacheWriteIndex].n = n; Chris@1371: Chris@1371: m_cacheWriteIndex = (m_cacheWriteIndex + 1) % m_cacheSize; Chris@1093: Chris@1319: return col; Chris@1091: } Chris@1091: Chris@275: bool Chris@1045: FFTModel::estimateStableFrequency(int x, int y, double &frequency) Chris@275: { Chris@275: if (!isOK()) return false; Chris@275: Chris@1090: frequency = double(y * getSampleRate()) / m_fftSize; Chris@275: Chris@275: if (x+1 >= getWidth()) return false; Chris@275: Chris@275: // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. Chris@275: // At hopsize h and sample rate sr, one hop happens in h/sr sec. Chris@275: // At window size w, for bin b, f is b*sr/w. Chris@275: // thus 2pi phase shift happens in w/(b*sr) sec. Chris@275: // We need to know what phase shift we expect from h/sr sec. Chris@275: // -> 2pi * ((h/sr) / (w/(b*sr))) Chris@275: // = 2pi * ((h * b * sr) / (w * sr)) Chris@275: // = 2pi * (h * b) / w. Chris@275: Chris@1038: double oldPhase = getPhaseAt(x, y); Chris@1038: double newPhase = getPhaseAt(x+1, y); Chris@275: Chris@929: int incr = getResolution(); Chris@275: Chris@1090: double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize; Chris@275: Chris@1038: double phaseError = princarg(newPhase - expectedPhase); Chris@275: Chris@275: // The new frequency estimate based on the phase error resulting Chris@275: // from assuming the "native" frequency of this bin Chris@275: Chris@275: frequency = Chris@1090: (getSampleRate() * (expectedPhase + phaseError - oldPhase)) / Chris@1045: (2.0 * M_PI * incr); Chris@275: Chris@275: return true; Chris@275: } Chris@275: Chris@275: FFTModel::PeakLocationSet Chris@1191: FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax) const Chris@275: { Chris@551: Profiler profiler("FFTModel::getPeaks"); Chris@1575: Chris@275: FFTModel::PeakLocationSet peaks; Chris@275: if (!isOK()) return peaks; Chris@275: Chris@275: if (ymax == 0 || ymax > getHeight() - 1) { Chris@275: ymax = getHeight() - 1; Chris@275: } Chris@275: Chris@275: if (type == AllPeaks) { Chris@551: int minbin = ymin; Chris@551: if (minbin > 0) minbin = minbin - 1; Chris@551: int maxbin = ymax; Chris@551: if (maxbin < getHeight() - 1) maxbin = maxbin + 1; Chris@551: const int n = maxbin - minbin + 1; Chris@1218: float *values = new float[n]; Chris@551: getMagnitudesAt(x, values, minbin, maxbin - minbin + 1); Chris@929: for (int bin = ymin; bin <= ymax; ++bin) { Chris@551: if (bin == minbin || bin == maxbin) continue; Chris@551: if (values[bin - minbin] > values[bin - minbin - 1] && Chris@551: values[bin - minbin] > values[bin - minbin + 1]) { Chris@275: peaks.insert(bin); Chris@275: } Chris@275: } Chris@1218: delete[] values; Chris@275: return peaks; Chris@275: } Chris@275: Chris@551: Column values = getColumn(x); Chris@1154: int nv = int(values.size()); Chris@275: Chris@500: float mean = 0.f; Chris@1154: for (int i = 0; i < nv; ++i) mean += values[i]; Chris@1154: if (nv > 0) mean = mean / float(values.size()); Chris@1038: Chris@275: // For peak picking we use a moving median window, picking the Chris@275: // highest value within each continuous region of values that Chris@275: // exceed the median. For pitch adaptivity, we adjust the window Chris@275: // size to a roughly constant pitch range (about four tones). Chris@275: Chris@1040: sv_samplerate_t sampleRate = getSampleRate(); Chris@275: Chris@1090: vector inrange; Chris@1576: double dist = 0.5; Chris@500: Chris@929: int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist); Chris@929: int halfWin = medianWinSize/2; Chris@275: Chris@1573: MovingMedian window(medianWinSize); Chris@1573: Chris@929: int binmin; Chris@275: if (ymin > halfWin) binmin = ymin - halfWin; Chris@275: else binmin = 0; Chris@275: Chris@929: int binmax; Chris@1154: if (ymax + halfWin < nv) binmax = ymax + halfWin; Chris@1154: else binmax = nv - 1; Chris@275: Chris@929: int prevcentre = 0; Chris@500: Chris@929: for (int bin = binmin; bin <= binmax; ++bin) { Chris@275: Chris@275: float value = values[bin]; Chris@275: Chris@280: // so-called median will actually be the dist*100'th percentile Chris@280: medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist); Chris@275: halfWin = medianWinSize/2; Chris@275: Chris@1573: int actualSize = std::min(medianWinSize, bin - binmin + 1); Chris@1573: window.resize(actualSize); Chris@1573: window.setPercentile(dist * 100.0); Chris@1573: window.push(value); Chris@275: Chris@275: if (type == MajorPitchAdaptivePeaks) { Chris@1154: if (ymax + halfWin < nv) binmax = ymax + halfWin; Chris@1154: else binmax = nv - 1; Chris@275: } Chris@275: Chris@1573: float median = window.get(); Chris@275: Chris@929: int centrebin = 0; Chris@500: if (bin > actualSize/2) centrebin = bin - actualSize/2; Chris@500: Chris@500: while (centrebin > prevcentre || bin == binmin) { Chris@275: Chris@500: if (centrebin > prevcentre) ++prevcentre; Chris@500: Chris@500: float centre = values[prevcentre]; Chris@500: Chris@500: if (centre > median) { Chris@500: inrange.push_back(centrebin); Chris@500: } Chris@500: Chris@1154: if (centre <= median || centrebin+1 == nv) { Chris@500: if (!inrange.empty()) { Chris@929: int peakbin = 0; Chris@500: float peakval = 0.f; Chris@929: for (int i = 0; i < (int)inrange.size(); ++i) { Chris@500: if (i == 0 || values[inrange[i]] > peakval) { Chris@500: peakval = values[inrange[i]]; Chris@500: peakbin = inrange[i]; Chris@500: } Chris@500: } Chris@500: inrange.clear(); Chris@500: if (peakbin >= ymin && peakbin <= ymax) { Chris@500: peaks.insert(peakbin); Chris@275: } Chris@275: } Chris@275: } Chris@500: Chris@500: if (bin == binmin) break; Chris@275: } Chris@275: } Chris@275: Chris@275: return peaks; Chris@275: } Chris@275: Chris@929: int Chris@1040: FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate, Chris@1576: int bin, double &dist) const Chris@275: { Chris@1576: dist = 0.5; // dist is percentile / 100.0 Chris@275: if (type == MajorPeaks) return 10; Chris@275: if (bin == 0) return 3; Chris@280: Chris@1091: double binfreq = (sampleRate * bin) / m_fftSize; Chris@1038: double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq); Chris@280: Chris@1091: int hibin = int(lrint((hifreq * m_fftSize) / sampleRate)); Chris@275: int medianWinSize = hibin - bin; Chris@1576: Chris@1575: if (medianWinSize < 3) { Chris@1575: medianWinSize = 3; Chris@1575: } Chris@1576: Chris@1576: // We want to avoid the median window size changing too often, as Chris@1576: // it requires a reallocation. So snap to a nearby round number. Chris@1576: Chris@1575: if (medianWinSize > 20) { Chris@1575: medianWinSize = (1 + medianWinSize / 10) * 10; Chris@1575: } Chris@1576: if (medianWinSize > 200) { Chris@1576: medianWinSize = (1 + medianWinSize / 100) * 100; Chris@1576: } Chris@1576: if (medianWinSize > 2000) { Chris@1576: medianWinSize = (1 + medianWinSize / 1000) * 1000; Chris@1576: } Chris@1576: if (medianWinSize > 20000) { Chris@1576: medianWinSize = 20000; Chris@1575: } Chris@280: Chris@1576: if (medianWinSize < 100) { Chris@1576: dist = 1.0 - (4.0 / medianWinSize); Chris@1576: } else { Chris@1576: dist = 1.0 - (8.0 / medianWinSize); Chris@1576: } Chris@1576: if (dist < 0.5) dist = 0.5; Chris@1575: Chris@275: return medianWinSize; Chris@275: } Chris@275: Chris@275: FFTModel::PeakSet Chris@929: FFTModel::getPeakFrequencies(PeakPickType type, int x, Chris@1191: int ymin, int ymax) const Chris@275: { Chris@551: Profiler profiler("FFTModel::getPeakFrequencies"); Chris@551: Chris@275: PeakSet peaks; Chris@275: if (!isOK()) return peaks; Chris@275: PeakLocationSet locations = getPeaks(type, x, ymin, ymax); Chris@275: Chris@1040: sv_samplerate_t sampleRate = getSampleRate(); Chris@929: int incr = getResolution(); Chris@275: Chris@275: // This duplicates some of the work of estimateStableFrequency to Chris@275: // allow us to retrieve the phases in two separate vertical Chris@275: // columns, instead of jumping back and forth between columns x and Chris@275: // x+1, which may be significantly slower if re-seeking is needed Chris@275: Chris@1090: vector phases; Chris@275: for (PeakLocationSet::iterator i = locations.begin(); Chris@275: i != locations.end(); ++i) { Chris@275: phases.push_back(getPhaseAt(x, *i)); Chris@275: } Chris@275: Chris@929: int phaseIndex = 0; Chris@275: for (PeakLocationSet::iterator i = locations.begin(); Chris@275: i != locations.end(); ++i) { Chris@1038: double oldPhase = phases[phaseIndex]; Chris@1038: double newPhase = getPhaseAt(x+1, *i); Chris@1090: double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize; Chris@1038: double phaseError = princarg(newPhase - expectedPhase); Chris@1038: double frequency = Chris@275: (sampleRate * (expectedPhase + phaseError - oldPhase)) Chris@275: / (2 * M_PI * incr); Chris@1045: peaks[*i] = frequency; Chris@275: ++phaseIndex; Chris@275: } Chris@275: Chris@275: return peaks; Chris@275: } Chris@275: