Chris@152: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@152: Chris@152: /* Chris@152: Sonic Visualiser Chris@152: An audio file viewer and annotation editor. Chris@152: Centre for Digital Music, Queen Mary, University of London. Chris@152: This file copyright 2006 Chris Cannam. Chris@152: Chris@152: This program is free software; you can redistribute it and/or Chris@152: modify it under the terms of the GNU General Public License as Chris@152: published by the Free Software Foundation; either version 2 of the Chris@152: License, or (at your option) any later version. See the file Chris@152: COPYING included with this distribution for more information. Chris@152: */ Chris@152: Chris@152: #include "FFTModel.h" Chris@152: #include "DenseTimeValueModel.h" Chris@152: Chris@183: #include "base/Profiler.h" Chris@275: #include "base/Pitch.h" Chris@183: Chris@402: #include Chris@402: Chris@152: #include Chris@1090: #include Chris@152: Chris@608: #ifndef __GNUC__ Chris@608: #include Chris@608: #endif Chris@608: Chris@1090: using namespace std; Chris@1090: Chris@152: FFTModel::FFTModel(const DenseTimeValueModel *model, Chris@152: int channel, Chris@152: WindowType windowType, Chris@929: int windowSize, Chris@929: int windowIncrement, Chris@1090: int fftSize) : Chris@1090: m_model(model), Chris@1090: m_channel(channel), Chris@1090: m_windowType(windowType), Chris@1090: m_windowSize(windowSize), Chris@1090: m_windowIncrement(windowIncrement), Chris@1090: m_fftSize(fftSize), Chris@1090: m_windower(windowType, windowSize) Chris@152: { Chris@152: } Chris@152: Chris@152: FFTModel::~FFTModel() Chris@152: { Chris@152: } Chris@152: Chris@360: void Chris@360: FFTModel::sourceModelAboutToBeDeleted() Chris@360: { Chris@1090: if (m_model) { Chris@1090: cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl; Chris@1090: m_model = 0; Chris@360: } Chris@360: } Chris@360: Chris@152: QString Chris@929: FFTModel::getBinName(int n) const Chris@152: { Chris@1040: sv_samplerate_t sr = getSampleRate(); Chris@152: if (!sr) return ""; Chris@204: QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2)); Chris@152: return name; Chris@152: } Chris@152: Chris@275: bool Chris@1045: FFTModel::estimateStableFrequency(int x, int y, double &frequency) Chris@275: { Chris@275: if (!isOK()) return false; Chris@275: Chris@1090: frequency = double(y * getSampleRate()) / m_fftSize; Chris@275: Chris@275: if (x+1 >= getWidth()) return false; Chris@275: Chris@275: // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec. Chris@275: // At hopsize h and sample rate sr, one hop happens in h/sr sec. Chris@275: // At window size w, for bin b, f is b*sr/w. Chris@275: // thus 2pi phase shift happens in w/(b*sr) sec. Chris@275: // We need to know what phase shift we expect from h/sr sec. Chris@275: // -> 2pi * ((h/sr) / (w/(b*sr))) Chris@275: // = 2pi * ((h * b * sr) / (w * sr)) Chris@275: // = 2pi * (h * b) / w. Chris@275: Chris@1038: double oldPhase = getPhaseAt(x, y); Chris@1038: double newPhase = getPhaseAt(x+1, y); Chris@275: Chris@929: int incr = getResolution(); Chris@275: Chris@1090: double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize; Chris@275: Chris@1038: double phaseError = princarg(newPhase - expectedPhase); Chris@275: Chris@275: // The new frequency estimate based on the phase error resulting Chris@275: // from assuming the "native" frequency of this bin Chris@275: Chris@275: frequency = Chris@1090: (getSampleRate() * (expectedPhase + phaseError - oldPhase)) / Chris@1045: (2.0 * M_PI * incr); Chris@275: Chris@275: return true; Chris@275: } Chris@275: Chris@275: FFTModel::PeakLocationSet Chris@929: FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax) Chris@275: { Chris@551: Profiler profiler("FFTModel::getPeaks"); Chris@551: Chris@275: FFTModel::PeakLocationSet peaks; Chris@275: if (!isOK()) return peaks; Chris@275: Chris@275: if (ymax == 0 || ymax > getHeight() - 1) { Chris@275: ymax = getHeight() - 1; Chris@275: } Chris@275: Chris@275: if (type == AllPeaks) { Chris@551: int minbin = ymin; Chris@551: if (minbin > 0) minbin = minbin - 1; Chris@551: int maxbin = ymax; Chris@551: if (maxbin < getHeight() - 1) maxbin = maxbin + 1; Chris@551: const int n = maxbin - minbin + 1; Chris@608: #ifdef __GNUC__ Chris@551: float values[n]; Chris@608: #else Chris@608: float *values = (float *)alloca(n * sizeof(float)); Chris@608: #endif Chris@551: getMagnitudesAt(x, values, minbin, maxbin - minbin + 1); Chris@929: for (int bin = ymin; bin <= ymax; ++bin) { Chris@551: if (bin == minbin || bin == maxbin) continue; Chris@551: if (values[bin - minbin] > values[bin - minbin - 1] && Chris@551: values[bin - minbin] > values[bin - minbin + 1]) { Chris@275: peaks.insert(bin); Chris@275: } Chris@275: } Chris@275: return peaks; Chris@275: } Chris@275: Chris@551: Column values = getColumn(x); Chris@275: Chris@500: float mean = 0.f; Chris@551: for (int i = 0; i < values.size(); ++i) mean += values[i]; Chris@1038: if (values.size() > 0) mean = mean / float(values.size()); Chris@1038: Chris@275: // For peak picking we use a moving median window, picking the Chris@275: // highest value within each continuous region of values that Chris@275: // exceed the median. For pitch adaptivity, we adjust the window Chris@275: // size to a roughly constant pitch range (about four tones). Chris@275: Chris@1040: sv_samplerate_t sampleRate = getSampleRate(); Chris@275: Chris@1090: deque window; Chris@1090: vector inrange; Chris@280: float dist = 0.5; Chris@500: Chris@929: int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist); Chris@929: int halfWin = medianWinSize/2; Chris@275: Chris@929: int binmin; Chris@275: if (ymin > halfWin) binmin = ymin - halfWin; Chris@275: else binmin = 0; Chris@275: Chris@929: int binmax; Chris@275: if (ymax + halfWin < values.size()) binmax = ymax + halfWin; Chris@275: else binmax = values.size()-1; Chris@275: Chris@929: int prevcentre = 0; Chris@500: Chris@929: for (int bin = binmin; bin <= binmax; ++bin) { Chris@275: Chris@275: float value = values[bin]; Chris@275: Chris@275: window.push_back(value); Chris@275: Chris@280: // so-called median will actually be the dist*100'th percentile Chris@280: medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist); Chris@275: halfWin = medianWinSize/2; Chris@275: Chris@929: while ((int)window.size() > medianWinSize) { Chris@500: window.pop_front(); Chris@500: } Chris@500: Chris@1038: int actualSize = int(window.size()); Chris@275: Chris@275: if (type == MajorPitchAdaptivePeaks) { Chris@275: if (ymax + halfWin < values.size()) binmax = ymax + halfWin; Chris@275: else binmax = values.size()-1; Chris@275: } Chris@275: Chris@1090: deque sorted(window); Chris@1090: sort(sorted.begin(), sorted.end()); Chris@1038: float median = sorted[int(float(sorted.size()) * dist)]; Chris@275: Chris@929: int centrebin = 0; Chris@500: if (bin > actualSize/2) centrebin = bin - actualSize/2; Chris@500: Chris@500: while (centrebin > prevcentre || bin == binmin) { Chris@275: Chris@500: if (centrebin > prevcentre) ++prevcentre; Chris@500: Chris@500: float centre = values[prevcentre]; Chris@500: Chris@500: if (centre > median) { Chris@500: inrange.push_back(centrebin); Chris@500: } Chris@500: Chris@500: if (centre <= median || centrebin+1 == values.size()) { Chris@500: if (!inrange.empty()) { Chris@929: int peakbin = 0; Chris@500: float peakval = 0.f; Chris@929: for (int i = 0; i < (int)inrange.size(); ++i) { Chris@500: if (i == 0 || values[inrange[i]] > peakval) { Chris@500: peakval = values[inrange[i]]; Chris@500: peakbin = inrange[i]; Chris@500: } Chris@500: } Chris@500: inrange.clear(); Chris@500: if (peakbin >= ymin && peakbin <= ymax) { Chris@500: peaks.insert(peakbin); Chris@275: } Chris@275: } Chris@275: } Chris@500: Chris@500: if (bin == binmin) break; Chris@275: } Chris@275: } Chris@275: Chris@275: return peaks; Chris@275: } Chris@275: Chris@929: int Chris@1040: FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate, Chris@929: int bin, float &percentile) const Chris@275: { Chris@280: percentile = 0.5; Chris@275: if (type == MajorPeaks) return 10; Chris@275: if (bin == 0) return 3; Chris@280: Chris@1090: double binfreq = (getSampleRate() * bin) / m_fftSize; Chris@1038: double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq); Chris@280: Chris@1090: int hibin = int(lrint((hifreq * m_fftSize) / getSampleRate())); Chris@275: int medianWinSize = hibin - bin; Chris@275: if (medianWinSize < 3) medianWinSize = 3; Chris@280: Chris@1090: percentile = 0.5f + float(binfreq / getSampleRate()); Chris@280: Chris@275: return medianWinSize; Chris@275: } Chris@275: Chris@275: FFTModel::PeakSet Chris@929: FFTModel::getPeakFrequencies(PeakPickType type, int x, Chris@929: int ymin, int ymax) Chris@275: { Chris@551: Profiler profiler("FFTModel::getPeakFrequencies"); Chris@551: Chris@275: PeakSet peaks; Chris@275: if (!isOK()) return peaks; Chris@275: PeakLocationSet locations = getPeaks(type, x, ymin, ymax); Chris@275: Chris@1040: sv_samplerate_t sampleRate = getSampleRate(); Chris@929: int incr = getResolution(); Chris@275: Chris@275: // This duplicates some of the work of estimateStableFrequency to Chris@275: // allow us to retrieve the phases in two separate vertical Chris@275: // columns, instead of jumping back and forth between columns x and Chris@275: // x+1, which may be significantly slower if re-seeking is needed Chris@275: Chris@1090: vector phases; Chris@275: for (PeakLocationSet::iterator i = locations.begin(); Chris@275: i != locations.end(); ++i) { Chris@275: phases.push_back(getPhaseAt(x, *i)); Chris@275: } Chris@275: Chris@929: int phaseIndex = 0; Chris@275: for (PeakLocationSet::iterator i = locations.begin(); Chris@275: i != locations.end(); ++i) { Chris@1038: double oldPhase = phases[phaseIndex]; Chris@1038: double newPhase = getPhaseAt(x+1, *i); Chris@1090: double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize; Chris@1038: double phaseError = princarg(newPhase - expectedPhase); Chris@1038: double frequency = Chris@275: (sampleRate * (expectedPhase + phaseError - oldPhase)) Chris@275: / (2 * M_PI * incr); Chris@1045: peaks[*i] = frequency; Chris@275: ++phaseIndex; Chris@275: } Chris@275: Chris@275: return peaks; Chris@275: } Chris@275: