Mercurial > hg > silvet
comparison src/Silvet.cpp @ 178:a53c713b2a4a
Restore (and fix) the time-frequency distribution output (filtered CQ); tidy up the bottom-octave handling in draft mode
author | Chris Cannam |
---|---|
date | Thu, 22 May 2014 12:11:33 +0100 |
parents | 8af9b6cd7451 |
children | 825193ef09d2 |
comparison
equal
deleted
inserted
replaced
177:d53d95bee46c | 178:a53c713b2a4a |
---|---|
39 m_instruments(InstrumentPack::listInstrumentPacks()), | 39 m_instruments(InstrumentPack::listInstrumentPacks()), |
40 m_resampler(0), | 40 m_resampler(0), |
41 m_cq(0), | 41 m_cq(0), |
42 m_hqMode(true), | 42 m_hqMode(true), |
43 m_fineTuning(false), | 43 m_fineTuning(false), |
44 m_instrument(0) | 44 m_instrument(0), |
45 m_colsPerSec(50) | |
45 { | 46 { |
46 } | 47 } |
47 | 48 |
48 Silvet::~Silvet() | 49 Silvet::~Silvet() |
49 { | 50 { |
238 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62); | 239 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62); |
239 d.hasDuration = true; | 240 d.hasDuration = true; |
240 m_notesOutputNo = list.size(); | 241 m_notesOutputNo = list.size(); |
241 list.push_back(d); | 242 list.push_back(d); |
242 | 243 |
244 d.identifier = "timefreq"; | |
245 d.name = "Time-frequency distribution"; | |
246 d.description = "Filtered constant-Q time-frequency distribution used as input to the expectation-maximisation algorithm"; | |
247 d.unit = ""; | |
248 d.hasFixedBinCount = true; | |
249 d.binCount = m_instruments[0].templateHeight; | |
250 d.binNames.clear(); | |
251 if (m_cq) { | |
252 char name[20]; | |
253 for (int i = 0; i < m_instruments[0].templateHeight; ++i) { | |
254 // We have a 600-bin (10 oct 60-bin CQ) of which the | |
255 // lowest-frequency 55 bins have been dropped, for a | |
256 // 545-bin template. The native CQ bins go high->low | |
257 // frequency though, so these are still the first 545 bins | |
258 // as reported by getBinFrequency, though in reverse order | |
259 float freq = m_cq->getBinFrequency | |
260 (m_instruments[0].templateHeight - i - 1); | |
261 sprintf(name, "%.1f Hz", freq); | |
262 d.binNames.push_back(name); | |
263 } | |
264 } | |
265 d.hasKnownExtents = false; | |
266 d.isQuantized = false; | |
267 d.sampleType = OutputDescriptor::FixedSampleRate; | |
268 d.sampleRate = m_colsPerSec; | |
269 d.hasDuration = false; | |
270 m_fcqOutputNo = list.size(); | |
271 list.push_back(d); | |
272 | |
243 return list; | 273 return list; |
244 } | 274 } |
245 | 275 |
246 std::string | 276 std::string |
247 Silvet::noteName(int note, int shift, int shiftCount) const | 277 Silvet::noteName(int note, int shift, int shiftCount) const |
383 if (m_resampler) { | 413 if (m_resampler) { |
384 data = m_resampler->process(data.data(), data.size()); | 414 data = m_resampler->process(data.data(), data.size()); |
385 } | 415 } |
386 | 416 |
387 Grid cqout = m_cq->process(data); | 417 Grid cqout = m_cq->process(data); |
388 | |
389 if (!m_hqMode) { | |
390 // Our CQ is one octave shorter in draft mode, so pad with | |
391 // zeros | |
392 vector<double> octave(m_cq->getBinsPerOctave(), 0.0); | |
393 for (int i = 0; i < int(cqout.size()); ++i) { | |
394 cqout[i].insert(cqout[i].end(), octave.begin(), octave.end()); | |
395 } | |
396 } | |
397 | |
398 FeatureSet fs = transcribe(cqout); | 418 FeatureSet fs = transcribe(cqout); |
399 return fs; | 419 return fs; |
400 } | 420 } |
401 | 421 |
402 Silvet::FeatureSet | 422 Silvet::FeatureSet |
415 FeatureSet fs; | 435 FeatureSet fs; |
416 | 436 |
417 if (filtered.empty()) return fs; | 437 if (filtered.empty()) return fs; |
418 | 438 |
419 const InstrumentPack &pack = m_instruments[m_instrument]; | 439 const InstrumentPack &pack = m_instruments[m_instrument]; |
440 | |
441 for (int i = 0; i < (int)filtered.size(); ++i) { | |
442 Feature f; | |
443 for (int j = 0; j < pack.templateHeight; ++j) { | |
444 f.values.push_back(float(filtered[i][j])); | |
445 } | |
446 fs[m_fcqOutputNo].push_back(f); | |
447 } | |
420 | 448 |
421 int width = filtered.size(); | 449 int width = filtered.size(); |
422 | 450 |
423 int iterations = m_hqMode ? 20 : 10; | 451 int iterations = m_hqMode ? 20 : 10; |
424 | 452 |
539 | 567 |
540 if (select) { | 568 if (select) { |
541 vector<double> inCol = in[i]; | 569 vector<double> inCol = in[i]; |
542 vector<double> outCol(pack.templateHeight); | 570 vector<double> outCol(pack.templateHeight); |
543 | 571 |
544 // we reverse the column as we go (the CQ output is | 572 // In HQ mode, the CQ returns 600 bins and we ignore the |
545 // "upside-down", with high frequencies at the start of | 573 // lowest 55 of them. |
546 // each column, and we want it the other way around) and | 574 // |
547 // then ignore the first 55 (lowest-frequency) bins, | 575 // In draft mode the CQ is an octave shorter, returning |
548 // giving us 545 bins instead of 600 | 576 // 540 bins, so we instead pad them with an additional 5 |
549 | 577 // zeros. |
550 for (int j = 0; j < pack.templateHeight; ++j) { | 578 // |
551 int ix = inCol.size() - j - 55; | 579 // We also need to reverse the column as we go, since the |
552 outCol[j] = inCol[ix]; | 580 // raw CQ has the high frequencies first and we need it |
581 // the other way around. | |
582 | |
583 if (m_hqMode) { | |
584 for (int j = 0; j < pack.templateHeight; ++j) { | |
585 int ix = inCol.size() - j - 55; | |
586 outCol[j] = inCol[ix]; | |
587 } | |
588 } else { | |
589 for (int j = 0; j < 5; ++j) { | |
590 outCol[j] = 0.0; | |
591 } | |
592 for (int j = 5; j < pack.templateHeight; ++j) { | |
593 int ix = inCol.size() - j + 4; | |
594 outCol[j] = inCol[ix]; | |
595 } | |
553 } | 596 } |
554 | 597 |
555 vector<double> noiseLevel1 = | 598 vector<double> noiseLevel1 = |
556 MedianFilter<double>::filter(40, outCol); | 599 MedianFilter<double>::filter(40, outCol); |
557 for (int j = 0; j < pack.templateHeight; ++j) { | 600 for (int j = 0; j < pack.templateHeight; ++j) { |