comparison src/Silvet.cpp @ 178:a53c713b2a4a

Restore (and fix) the time-frequency distribution output (filtered CQ); tidy up the bottom-octave handling in draft mode
author Chris Cannam
date Thu, 22 May 2014 12:11:33 +0100
parents 8af9b6cd7451
children 825193ef09d2
comparison
equal deleted inserted replaced
177:d53d95bee46c 178:a53c713b2a4a
39 m_instruments(InstrumentPack::listInstrumentPacks()), 39 m_instruments(InstrumentPack::listInstrumentPacks()),
40 m_resampler(0), 40 m_resampler(0),
41 m_cq(0), 41 m_cq(0),
42 m_hqMode(true), 42 m_hqMode(true),
43 m_fineTuning(false), 43 m_fineTuning(false),
44 m_instrument(0) 44 m_instrument(0),
45 m_colsPerSec(50)
45 { 46 {
46 } 47 }
47 48
48 Silvet::~Silvet() 49 Silvet::~Silvet()
49 { 50 {
238 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62); 239 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
239 d.hasDuration = true; 240 d.hasDuration = true;
240 m_notesOutputNo = list.size(); 241 m_notesOutputNo = list.size();
241 list.push_back(d); 242 list.push_back(d);
242 243
244 d.identifier = "timefreq";
245 d.name = "Time-frequency distribution";
246 d.description = "Filtered constant-Q time-frequency distribution used as input to the expectation-maximisation algorithm";
247 d.unit = "";
248 d.hasFixedBinCount = true;
249 d.binCount = m_instruments[0].templateHeight;
250 d.binNames.clear();
251 if (m_cq) {
252 char name[20];
253 for (int i = 0; i < m_instruments[0].templateHeight; ++i) {
254 // We have a 600-bin (10 oct 60-bin CQ) of which the
255 // lowest-frequency 55 bins have been dropped, for a
256 // 545-bin template. The native CQ bins go high->low
257 // frequency though, so these are still the first 545 bins
258 // as reported by getBinFrequency, though in reverse order
259 float freq = m_cq->getBinFrequency
260 (m_instruments[0].templateHeight - i - 1);
261 sprintf(name, "%.1f Hz", freq);
262 d.binNames.push_back(name);
263 }
264 }
265 d.hasKnownExtents = false;
266 d.isQuantized = false;
267 d.sampleType = OutputDescriptor::FixedSampleRate;
268 d.sampleRate = m_colsPerSec;
269 d.hasDuration = false;
270 m_fcqOutputNo = list.size();
271 list.push_back(d);
272
243 return list; 273 return list;
244 } 274 }
245 275
246 std::string 276 std::string
247 Silvet::noteName(int note, int shift, int shiftCount) const 277 Silvet::noteName(int note, int shift, int shiftCount) const
383 if (m_resampler) { 413 if (m_resampler) {
384 data = m_resampler->process(data.data(), data.size()); 414 data = m_resampler->process(data.data(), data.size());
385 } 415 }
386 416
387 Grid cqout = m_cq->process(data); 417 Grid cqout = m_cq->process(data);
388
389 if (!m_hqMode) {
390 // Our CQ is one octave shorter in draft mode, so pad with
391 // zeros
392 vector<double> octave(m_cq->getBinsPerOctave(), 0.0);
393 for (int i = 0; i < int(cqout.size()); ++i) {
394 cqout[i].insert(cqout[i].end(), octave.begin(), octave.end());
395 }
396 }
397
398 FeatureSet fs = transcribe(cqout); 418 FeatureSet fs = transcribe(cqout);
399 return fs; 419 return fs;
400 } 420 }
401 421
402 Silvet::FeatureSet 422 Silvet::FeatureSet
415 FeatureSet fs; 435 FeatureSet fs;
416 436
417 if (filtered.empty()) return fs; 437 if (filtered.empty()) return fs;
418 438
419 const InstrumentPack &pack = m_instruments[m_instrument]; 439 const InstrumentPack &pack = m_instruments[m_instrument];
440
441 for (int i = 0; i < (int)filtered.size(); ++i) {
442 Feature f;
443 for (int j = 0; j < pack.templateHeight; ++j) {
444 f.values.push_back(float(filtered[i][j]));
445 }
446 fs[m_fcqOutputNo].push_back(f);
447 }
420 448
421 int width = filtered.size(); 449 int width = filtered.size();
422 450
423 int iterations = m_hqMode ? 20 : 10; 451 int iterations = m_hqMode ? 20 : 10;
424 452
539 567
540 if (select) { 568 if (select) {
541 vector<double> inCol = in[i]; 569 vector<double> inCol = in[i];
542 vector<double> outCol(pack.templateHeight); 570 vector<double> outCol(pack.templateHeight);
543 571
544 // we reverse the column as we go (the CQ output is 572 // In HQ mode, the CQ returns 600 bins and we ignore the
545 // "upside-down", with high frequencies at the start of 573 // lowest 55 of them.
546 // each column, and we want it the other way around) and 574 //
547 // then ignore the first 55 (lowest-frequency) bins, 575 // In draft mode the CQ is an octave shorter, returning
548 // giving us 545 bins instead of 600 576 // 540 bins, so we instead pad them with an additional 5
549 577 // zeros.
550 for (int j = 0; j < pack.templateHeight; ++j) { 578 //
551 int ix = inCol.size() - j - 55; 579 // We also need to reverse the column as we go, since the
552 outCol[j] = inCol[ix]; 580 // raw CQ has the high frequencies first and we need it
581 // the other way around.
582
583 if (m_hqMode) {
584 for (int j = 0; j < pack.templateHeight; ++j) {
585 int ix = inCol.size() - j - 55;
586 outCol[j] = inCol[ix];
587 }
588 } else {
589 for (int j = 0; j < 5; ++j) {
590 outCol[j] = 0.0;
591 }
592 for (int j = 5; j < pack.templateHeight; ++j) {
593 int ix = inCol.size() - j + 4;
594 outCol[j] = inCol[ix];
595 }
553 } 596 }
554 597
555 vector<double> noiseLevel1 = 598 vector<double> noiseLevel1 =
556 MedianFilter<double>::filter(40, outCol); 599 MedianFilter<double>::filter(40, outCol);
557 for (int j = 0; j < pack.templateHeight; ++j) { 600 for (int j = 0; j < pack.templateHeight; ++j) {