Chris@152
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@152
|
2
|
Chris@152
|
3 /*
|
Chris@152
|
4 Sonic Visualiser
|
Chris@152
|
5 An audio file viewer and annotation editor.
|
Chris@152
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@152
|
7 This file copyright 2006 Chris Cannam.
|
Chris@152
|
8
|
Chris@152
|
9 This program is free software; you can redistribute it and/or
|
Chris@152
|
10 modify it under the terms of the GNU General Public License as
|
Chris@152
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@152
|
12 License, or (at your option) any later version. See the file
|
Chris@152
|
13 COPYING included with this distribution for more information.
|
Chris@152
|
14 */
|
Chris@152
|
15
|
Chris@152
|
16 #include "FFTModel.h"
|
Chris@152
|
17 #include "DenseTimeValueModel.h"
|
Chris@152
|
18
|
Chris@183
|
19 #include "base/Profiler.h"
|
Chris@275
|
20 #include "base/Pitch.h"
|
Chris@183
|
21
|
Chris@402
|
22 #include <algorithm>
|
Chris@402
|
23
|
Chris@152
|
24 #include <cassert>
|
Chris@1090
|
25 #include <deque>
|
Chris@152
|
26
|
Chris@608
|
27 #ifndef __GNUC__
|
Chris@608
|
28 #include <alloca.h>
|
Chris@608
|
29 #endif
|
Chris@608
|
30
|
Chris@1090
|
31 using namespace std;
|
Chris@1090
|
32
|
Chris@152
|
33 FFTModel::FFTModel(const DenseTimeValueModel *model,
|
Chris@152
|
34 int channel,
|
Chris@152
|
35 WindowType windowType,
|
Chris@929
|
36 int windowSize,
|
Chris@929
|
37 int windowIncrement,
|
Chris@1090
|
38 int fftSize) :
|
Chris@1090
|
39 m_model(model),
|
Chris@1090
|
40 m_channel(channel),
|
Chris@1090
|
41 m_windowType(windowType),
|
Chris@1090
|
42 m_windowSize(windowSize),
|
Chris@1090
|
43 m_windowIncrement(windowIncrement),
|
Chris@1090
|
44 m_fftSize(fftSize),
|
Chris@1090
|
45 m_windower(windowType, windowSize)
|
Chris@152
|
46 {
|
Chris@152
|
47 }
|
Chris@152
|
48
|
Chris@152
|
49 FFTModel::~FFTModel()
|
Chris@152
|
50 {
|
Chris@152
|
51 }
|
Chris@152
|
52
|
Chris@360
|
53 void
|
Chris@360
|
54 FFTModel::sourceModelAboutToBeDeleted()
|
Chris@360
|
55 {
|
Chris@1090
|
56 if (m_model) {
|
Chris@1090
|
57 cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_model << ")" << endl;
|
Chris@1090
|
58 m_model = 0;
|
Chris@360
|
59 }
|
Chris@360
|
60 }
|
Chris@360
|
61
|
Chris@152
|
62 QString
|
Chris@929
|
63 FFTModel::getBinName(int n) const
|
Chris@152
|
64 {
|
Chris@1040
|
65 sv_samplerate_t sr = getSampleRate();
|
Chris@152
|
66 if (!sr) return "";
|
Chris@204
|
67 QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
|
Chris@152
|
68 return name;
|
Chris@152
|
69 }
|
Chris@152
|
70
|
Chris@275
|
71 bool
|
Chris@1045
|
72 FFTModel::estimateStableFrequency(int x, int y, double &frequency)
|
Chris@275
|
73 {
|
Chris@275
|
74 if (!isOK()) return false;
|
Chris@275
|
75
|
Chris@1090
|
76 frequency = double(y * getSampleRate()) / m_fftSize;
|
Chris@275
|
77
|
Chris@275
|
78 if (x+1 >= getWidth()) return false;
|
Chris@275
|
79
|
Chris@275
|
80 // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
|
Chris@275
|
81 // At hopsize h and sample rate sr, one hop happens in h/sr sec.
|
Chris@275
|
82 // At window size w, for bin b, f is b*sr/w.
|
Chris@275
|
83 // thus 2pi phase shift happens in w/(b*sr) sec.
|
Chris@275
|
84 // We need to know what phase shift we expect from h/sr sec.
|
Chris@275
|
85 // -> 2pi * ((h/sr) / (w/(b*sr)))
|
Chris@275
|
86 // = 2pi * ((h * b * sr) / (w * sr))
|
Chris@275
|
87 // = 2pi * (h * b) / w.
|
Chris@275
|
88
|
Chris@1038
|
89 double oldPhase = getPhaseAt(x, y);
|
Chris@1038
|
90 double newPhase = getPhaseAt(x+1, y);
|
Chris@275
|
91
|
Chris@929
|
92 int incr = getResolution();
|
Chris@275
|
93
|
Chris@1090
|
94 double expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / m_fftSize;
|
Chris@275
|
95
|
Chris@1038
|
96 double phaseError = princarg(newPhase - expectedPhase);
|
Chris@275
|
97
|
Chris@275
|
98 // The new frequency estimate based on the phase error resulting
|
Chris@275
|
99 // from assuming the "native" frequency of this bin
|
Chris@275
|
100
|
Chris@275
|
101 frequency =
|
Chris@1090
|
102 (getSampleRate() * (expectedPhase + phaseError - oldPhase)) /
|
Chris@1045
|
103 (2.0 * M_PI * incr);
|
Chris@275
|
104
|
Chris@275
|
105 return true;
|
Chris@275
|
106 }
|
Chris@275
|
107
|
Chris@275
|
108 FFTModel::PeakLocationSet
|
Chris@929
|
109 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
|
Chris@275
|
110 {
|
Chris@551
|
111 Profiler profiler("FFTModel::getPeaks");
|
Chris@551
|
112
|
Chris@275
|
113 FFTModel::PeakLocationSet peaks;
|
Chris@275
|
114 if (!isOK()) return peaks;
|
Chris@275
|
115
|
Chris@275
|
116 if (ymax == 0 || ymax > getHeight() - 1) {
|
Chris@275
|
117 ymax = getHeight() - 1;
|
Chris@275
|
118 }
|
Chris@275
|
119
|
Chris@275
|
120 if (type == AllPeaks) {
|
Chris@551
|
121 int minbin = ymin;
|
Chris@551
|
122 if (minbin > 0) minbin = minbin - 1;
|
Chris@551
|
123 int maxbin = ymax;
|
Chris@551
|
124 if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
|
Chris@551
|
125 const int n = maxbin - minbin + 1;
|
Chris@608
|
126 #ifdef __GNUC__
|
Chris@551
|
127 float values[n];
|
Chris@608
|
128 #else
|
Chris@608
|
129 float *values = (float *)alloca(n * sizeof(float));
|
Chris@608
|
130 #endif
|
Chris@551
|
131 getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
|
Chris@929
|
132 for (int bin = ymin; bin <= ymax; ++bin) {
|
Chris@551
|
133 if (bin == minbin || bin == maxbin) continue;
|
Chris@551
|
134 if (values[bin - minbin] > values[bin - minbin - 1] &&
|
Chris@551
|
135 values[bin - minbin] > values[bin - minbin + 1]) {
|
Chris@275
|
136 peaks.insert(bin);
|
Chris@275
|
137 }
|
Chris@275
|
138 }
|
Chris@275
|
139 return peaks;
|
Chris@275
|
140 }
|
Chris@275
|
141
|
Chris@551
|
142 Column values = getColumn(x);
|
Chris@275
|
143
|
Chris@500
|
144 float mean = 0.f;
|
Chris@551
|
145 for (int i = 0; i < values.size(); ++i) mean += values[i];
|
Chris@1038
|
146 if (values.size() > 0) mean = mean / float(values.size());
|
Chris@1038
|
147
|
Chris@275
|
148 // For peak picking we use a moving median window, picking the
|
Chris@275
|
149 // highest value within each continuous region of values that
|
Chris@275
|
150 // exceed the median. For pitch adaptivity, we adjust the window
|
Chris@275
|
151 // size to a roughly constant pitch range (about four tones).
|
Chris@275
|
152
|
Chris@1040
|
153 sv_samplerate_t sampleRate = getSampleRate();
|
Chris@275
|
154
|
Chris@1090
|
155 deque<float> window;
|
Chris@1090
|
156 vector<int> inrange;
|
Chris@280
|
157 float dist = 0.5;
|
Chris@500
|
158
|
Chris@929
|
159 int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
|
Chris@929
|
160 int halfWin = medianWinSize/2;
|
Chris@275
|
161
|
Chris@929
|
162 int binmin;
|
Chris@275
|
163 if (ymin > halfWin) binmin = ymin - halfWin;
|
Chris@275
|
164 else binmin = 0;
|
Chris@275
|
165
|
Chris@929
|
166 int binmax;
|
Chris@275
|
167 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
|
Chris@275
|
168 else binmax = values.size()-1;
|
Chris@275
|
169
|
Chris@929
|
170 int prevcentre = 0;
|
Chris@500
|
171
|
Chris@929
|
172 for (int bin = binmin; bin <= binmax; ++bin) {
|
Chris@275
|
173
|
Chris@275
|
174 float value = values[bin];
|
Chris@275
|
175
|
Chris@275
|
176 window.push_back(value);
|
Chris@275
|
177
|
Chris@280
|
178 // so-called median will actually be the dist*100'th percentile
|
Chris@280
|
179 medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
|
Chris@275
|
180 halfWin = medianWinSize/2;
|
Chris@275
|
181
|
Chris@929
|
182 while ((int)window.size() > medianWinSize) {
|
Chris@500
|
183 window.pop_front();
|
Chris@500
|
184 }
|
Chris@500
|
185
|
Chris@1038
|
186 int actualSize = int(window.size());
|
Chris@275
|
187
|
Chris@275
|
188 if (type == MajorPitchAdaptivePeaks) {
|
Chris@275
|
189 if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
|
Chris@275
|
190 else binmax = values.size()-1;
|
Chris@275
|
191 }
|
Chris@275
|
192
|
Chris@1090
|
193 deque<float> sorted(window);
|
Chris@1090
|
194 sort(sorted.begin(), sorted.end());
|
Chris@1038
|
195 float median = sorted[int(float(sorted.size()) * dist)];
|
Chris@275
|
196
|
Chris@929
|
197 int centrebin = 0;
|
Chris@500
|
198 if (bin > actualSize/2) centrebin = bin - actualSize/2;
|
Chris@500
|
199
|
Chris@500
|
200 while (centrebin > prevcentre || bin == binmin) {
|
Chris@275
|
201
|
Chris@500
|
202 if (centrebin > prevcentre) ++prevcentre;
|
Chris@500
|
203
|
Chris@500
|
204 float centre = values[prevcentre];
|
Chris@500
|
205
|
Chris@500
|
206 if (centre > median) {
|
Chris@500
|
207 inrange.push_back(centrebin);
|
Chris@500
|
208 }
|
Chris@500
|
209
|
Chris@500
|
210 if (centre <= median || centrebin+1 == values.size()) {
|
Chris@500
|
211 if (!inrange.empty()) {
|
Chris@929
|
212 int peakbin = 0;
|
Chris@500
|
213 float peakval = 0.f;
|
Chris@929
|
214 for (int i = 0; i < (int)inrange.size(); ++i) {
|
Chris@500
|
215 if (i == 0 || values[inrange[i]] > peakval) {
|
Chris@500
|
216 peakval = values[inrange[i]];
|
Chris@500
|
217 peakbin = inrange[i];
|
Chris@500
|
218 }
|
Chris@500
|
219 }
|
Chris@500
|
220 inrange.clear();
|
Chris@500
|
221 if (peakbin >= ymin && peakbin <= ymax) {
|
Chris@500
|
222 peaks.insert(peakbin);
|
Chris@275
|
223 }
|
Chris@275
|
224 }
|
Chris@275
|
225 }
|
Chris@500
|
226
|
Chris@500
|
227 if (bin == binmin) break;
|
Chris@275
|
228 }
|
Chris@275
|
229 }
|
Chris@275
|
230
|
Chris@275
|
231 return peaks;
|
Chris@275
|
232 }
|
Chris@275
|
233
|
Chris@929
|
234 int
|
Chris@1040
|
235 FFTModel::getPeakPickWindowSize(PeakPickType type, sv_samplerate_t sampleRate,
|
Chris@929
|
236 int bin, float &percentile) const
|
Chris@275
|
237 {
|
Chris@280
|
238 percentile = 0.5;
|
Chris@275
|
239 if (type == MajorPeaks) return 10;
|
Chris@275
|
240 if (bin == 0) return 3;
|
Chris@280
|
241
|
Chris@1090
|
242 double binfreq = (getSampleRate() * bin) / m_fftSize;
|
Chris@1038
|
243 double hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
|
Chris@280
|
244
|
Chris@1090
|
245 int hibin = int(lrint((hifreq * m_fftSize) / getSampleRate()));
|
Chris@275
|
246 int medianWinSize = hibin - bin;
|
Chris@275
|
247 if (medianWinSize < 3) medianWinSize = 3;
|
Chris@280
|
248
|
Chris@1090
|
249 percentile = 0.5f + float(binfreq / getSampleRate());
|
Chris@280
|
250
|
Chris@275
|
251 return medianWinSize;
|
Chris@275
|
252 }
|
Chris@275
|
253
|
Chris@275
|
254 FFTModel::PeakSet
|
Chris@929
|
255 FFTModel::getPeakFrequencies(PeakPickType type, int x,
|
Chris@929
|
256 int ymin, int ymax)
|
Chris@275
|
257 {
|
Chris@551
|
258 Profiler profiler("FFTModel::getPeakFrequencies");
|
Chris@551
|
259
|
Chris@275
|
260 PeakSet peaks;
|
Chris@275
|
261 if (!isOK()) return peaks;
|
Chris@275
|
262 PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
|
Chris@275
|
263
|
Chris@1040
|
264 sv_samplerate_t sampleRate = getSampleRate();
|
Chris@929
|
265 int incr = getResolution();
|
Chris@275
|
266
|
Chris@275
|
267 // This duplicates some of the work of estimateStableFrequency to
|
Chris@275
|
268 // allow us to retrieve the phases in two separate vertical
|
Chris@275
|
269 // columns, instead of jumping back and forth between columns x and
|
Chris@275
|
270 // x+1, which may be significantly slower if re-seeking is needed
|
Chris@275
|
271
|
Chris@1090
|
272 vector<float> phases;
|
Chris@275
|
273 for (PeakLocationSet::iterator i = locations.begin();
|
Chris@275
|
274 i != locations.end(); ++i) {
|
Chris@275
|
275 phases.push_back(getPhaseAt(x, *i));
|
Chris@275
|
276 }
|
Chris@275
|
277
|
Chris@929
|
278 int phaseIndex = 0;
|
Chris@275
|
279 for (PeakLocationSet::iterator i = locations.begin();
|
Chris@275
|
280 i != locations.end(); ++i) {
|
Chris@1038
|
281 double oldPhase = phases[phaseIndex];
|
Chris@1038
|
282 double newPhase = getPhaseAt(x+1, *i);
|
Chris@1090
|
283 double expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / m_fftSize;
|
Chris@1038
|
284 double phaseError = princarg(newPhase - expectedPhase);
|
Chris@1038
|
285 double frequency =
|
Chris@275
|
286 (sampleRate * (expectedPhase + phaseError - oldPhase))
|
Chris@275
|
287 / (2 * M_PI * incr);
|
Chris@1045
|
288 peaks[*i] = frequency;
|
Chris@275
|
289 ++phaseIndex;
|
Chris@275
|
290 }
|
Chris@275
|
291
|
Chris@275
|
292 return peaks;
|
Chris@275
|
293 }
|
Chris@275
|
294
|