Mercurial > hg > silvet
comparison src/Silvet.cpp @ 32:da54468cc452
Filter the constant Q spectrogram in a similar manner to the matlab version
author | Chris Cannam |
---|---|
date | Fri, 04 Apr 2014 13:29:33 +0100 |
parents | c6d230c31713 |
children | e08c330a761d |
comparison
equal
deleted
inserted
replaced
31:c6d230c31713 | 32:da54468cc452 |
---|---|
15 | 15 |
16 #include "Silvet.h" | 16 #include "Silvet.h" |
17 | 17 |
18 #include "data/include/templates.h" | 18 #include "data/include/templates.h" |
19 | 19 |
20 #include "maths/MedianFilter.h" | |
20 #include "dsp/rateconversion/Resampler.h" | 21 #include "dsp/rateconversion/Resampler.h" |
21 | 22 |
22 #include "constant-q-cpp/cpp-qm-dsp/ConstantQ.h" | 23 #include "constant-q-cpp/cpp-qm-dsp/CQInterpolated.h" |
23 | 24 |
24 #include <vector> | 25 #include <vector> |
26 | |
27 #include <cstdio> | |
25 | 28 |
26 using std::vector; | 29 using std::vector; |
27 using std::cerr; | 30 using std::cerr; |
28 using std::endl; | 31 using std::endl; |
29 | 32 |
30 static int processingSampleRate = 44100; | 33 static int processingSampleRate = 44100; |
31 static int processingBPO = 60; | 34 static int processingBPO = 60; |
32 | 35 static int processingHeight = 545; |
33 | 36 |
34 Silvet::Silvet(float inputSampleRate) : | 37 Silvet::Silvet(float inputSampleRate) : |
35 Plugin(inputSampleRate), | 38 Plugin(inputSampleRate), |
36 m_resampler(0), | 39 m_resampler(0), |
37 m_cq(0) | 40 m_cq(0) |
40 | 43 |
41 Silvet::~Silvet() | 44 Silvet::~Silvet() |
42 { | 45 { |
43 delete m_resampler; | 46 delete m_resampler; |
44 delete m_cq; | 47 delete m_cq; |
48 for (int i = 0; i < (int)m_filterA.size(); ++i) { | |
49 delete m_filterA[i]; | |
50 delete m_filterB[i]; | |
51 } | |
45 } | 52 } |
46 | 53 |
47 string | 54 string |
48 Silvet::getIdentifier() const | 55 Silvet::getIdentifier() const |
49 { | 56 { |
167 d.binNames.push_back("Frequency"); | 174 d.binNames.push_back("Frequency"); |
168 d.binNames.push_back("Velocity"); | 175 d.binNames.push_back("Velocity"); |
169 d.hasKnownExtents = false; | 176 d.hasKnownExtents = false; |
170 d.isQuantized = false; | 177 d.isQuantized = false; |
171 d.sampleType = OutputDescriptor::VariableSampleRate; | 178 d.sampleType = OutputDescriptor::VariableSampleRate; |
172 d.sampleRate = 0; | 179 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 256); |
173 d.hasDuration = true; | 180 d.hasDuration = true; |
181 m_notesOutputNo = list.size(); | |
182 list.push_back(d); | |
183 | |
184 d.identifier = "inputgrid"; | |
185 d.name = "Filtered time-frequency grid"; | |
186 d.description = "The pre-processed constant-Q time-frequency distribution used as input to the PLCA step"; | |
187 d.unit = ""; | |
188 d.hasFixedBinCount = true; | |
189 d.binCount = processingHeight; | |
190 d.binNames.clear(); | |
191 if (m_cq) { | |
192 char name[20]; | |
193 for (int i = 0; i < processingHeight; ++i) { | |
194 float freq = m_cq->getBinFrequency(i + 55); | |
195 sprintf(name, "%.1f Hz", freq); | |
196 d.binNames.push_back(name); | |
197 } | |
198 } | |
199 d.hasKnownExtents = false; | |
200 d.isQuantized = false; | |
201 d.sampleType = OutputDescriptor::FixedSampleRate; | |
202 d.sampleRate = 25; | |
203 d.hasDuration = false; | |
204 m_cqOutputNo = list.size(); | |
174 list.push_back(d); | 205 list.push_back(d); |
175 | 206 |
176 return list; | 207 return list; |
177 } | 208 } |
178 | 209 |
205 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate); | 236 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate); |
206 } else { | 237 } else { |
207 m_resampler = 0; | 238 m_resampler = 0; |
208 } | 239 } |
209 | 240 |
210 m_cq = new ConstantQ | 241 m_cq = new CQInterpolated |
211 (processingSampleRate, 27.5, processingSampleRate / 3, processingBPO); | 242 (processingSampleRate, 27.5, processingSampleRate / 3, processingBPO, |
212 | 243 CQInterpolated::Linear); |
244 | |
245 for (int i = 0; i < (int)m_filterA.size(); ++i) { | |
246 delete m_filterA[i]; | |
247 delete m_filterB[i]; | |
248 } | |
249 m_filterA.clear(); | |
250 m_filterB.clear(); | |
251 for (int i = 0; i < processingHeight; ++i) { | |
252 m_filterA.push_back(new MedianFilter<double>(40)); | |
253 m_filterB.push_back(new MedianFilter<double>(40)); | |
254 } | |
255 m_columnCount = 0; | |
256 m_reducedColumnCount = 0; | |
213 } | 257 } |
214 | 258 |
215 Silvet::FeatureSet | 259 Silvet::FeatureSet |
216 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 260 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) |
217 { | 261 { |
220 | 264 |
221 if (m_resampler) { | 265 if (m_resampler) { |
222 data = m_resampler->process(data.data(), data.size()); | 266 data = m_resampler->process(data.data(), data.size()); |
223 } | 267 } |
224 | 268 |
225 vector<vector<double> > cqout = m_cq->process(data); | 269 Grid cqout = m_cq->process(data); |
226 | 270 Grid filtered = preProcess(cqout); |
227 return FeatureSet(); | 271 |
272 FeatureSet fs; | |
273 | |
274 for (int i = 0; i < (int)filtered.size(); ++i) { | |
275 Feature f; | |
276 for (int j = 0; j < processingHeight; ++j) { | |
277 f.values.push_back(float(filtered[i][j])); | |
278 } | |
279 fs[m_cqOutputNo].push_back(f); | |
280 } | |
281 | |
282 return fs; | |
228 } | 283 } |
229 | 284 |
230 Silvet::FeatureSet | 285 Silvet::FeatureSet |
231 Silvet::getRemainingFeatures() | 286 Silvet::getRemainingFeatures() |
232 { | 287 { |
233 | 288 |
234 return FeatureSet(); | 289 return FeatureSet(); |
235 } | 290 } |
236 | 291 |
292 Silvet::Grid | |
293 Silvet::preProcess(const Grid &in) | |
294 { | |
295 int width = in.size(); | |
296 | |
297 // reduce to 100 columns per second, or one column every 441 samples | |
298 | |
299 int spacing = processingSampleRate / 100; | |
300 | |
301 Grid out; | |
302 | |
303 for (int i = 0; i < width; ++i) { | |
304 | |
305 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop(); | |
306 int sampleNo = m_columnCount * m_cq->getColumnHop(); | |
307 | |
308 bool select = (sampleNo / spacing != prevSampleNo / spacing); | |
309 | |
310 if (select) { | |
311 vector<double> inCol = in[i]; | |
312 vector<double> outCol(processingHeight); | |
313 | |
314 // we reverse the column as we go (the CQ output is | |
315 // "upside-down", with high frequencies at the start of | |
316 // each column, and we want it the other way around) and | |
317 // then ignore the first 55 (lowest-frequency) bins, | |
318 // giving us 545 bins instead of 600 | |
319 | |
320 for (int j = 0; j < processingHeight; ++j) { | |
321 | |
322 int ix = inCol.size() - j - 55; | |
323 | |
324 double val = inCol[ix]; | |
325 m_filterA[j]->push(val); | |
326 | |
327 double a = m_filterA[j]->get(); | |
328 m_filterB[j]->push(std::min(a, val)); | |
329 | |
330 double filtered = m_filterB[j]->get(); | |
331 outCol[j] = filtered; | |
332 } | |
333 | |
334 // then we only use every fourth filtered column, for 25 | |
335 // columns per second in the eventual grid | |
336 | |
337 if (m_reducedColumnCount % 4 == 0) { | |
338 out.push_back(outCol); | |
339 } | |
340 | |
341 ++m_reducedColumnCount; | |
342 } | |
343 | |
344 ++m_columnCount; | |
345 } | |
346 | |
347 return out; | |
348 } | |
349 |