annotate plugins/AdaptiveSpectrogram.cpp @ 114:496e6d6eb413

* Add "coarse" option
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 21 May 2009 16:40:24 +0000
parents d0920575b48a
children 4883c6fbbb82
rev   line source
c@92 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@92 2
c@92 3 /*
c@92 4 QM Vamp Plugin Set
c@92 5
c@92 6 Centre for Digital Music, Queen Mary, University of London.
c@92 7 All rights reserved.
c@92 8 */
c@92 9
c@92 10 #include "AdaptiveSpectrogram.h"
c@92 11
c@92 12 #include <cstdlib>
c@92 13 #include <cstring>
c@114 14 #include <cfloat>
c@92 15
c@92 16 #include <iostream>
c@92 17
c@92 18 #include <dsp/transforms/FFT.h>
c@92 19
c@92 20 using std::string;
c@92 21 using std::vector;
c@92 22 using std::cerr;
c@92 23 using std::endl;
c@92 24
c@92 25 using Vamp::RealTime;
c@92 26
c@99 27 //#define DEBUG_VERBOSE 1
c@99 28
c@92 29 AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) :
c@92 30 Plugin(inputSampleRate),
c@104 31 m_w(8),
c@114 32 m_n(2),
c@114 33 m_coarse(false),
c@109 34 m_threaded(true),
c@109 35 m_threadsInUse(false)
c@92 36 {
c@92 37 }
c@92 38
c@92 39 AdaptiveSpectrogram::~AdaptiveSpectrogram()
c@92 40 {
c@104 41 for (int i = 0; i < m_cutThreads.size(); ++i) {
c@104 42 delete m_cutThreads[i];
c@104 43 }
c@104 44 m_cutThreads.clear();
c@105 45
c@110 46 for (FFTMap::iterator i = m_fftThreads.begin();
c@110 47 i != m_fftThreads.end(); ++i) {
c@106 48 delete i->second;
c@105 49 }
c@105 50 m_fftThreads.clear();
c@92 51 }
c@92 52
c@92 53 string
c@92 54 AdaptiveSpectrogram::getIdentifier() const
c@92 55 {
c@93 56 return "qm-adaptivespectrogram";
c@92 57 }
c@92 58
c@92 59 string
c@92 60 AdaptiveSpectrogram::getName() const
c@92 61 {
c@92 62 return "Adaptive Spectrogram";
c@92 63 }
c@92 64
c@92 65 string
c@92 66 AdaptiveSpectrogram::getDescription() const
c@92 67 {
c@92 68 return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions";
c@92 69 }
c@92 70
c@92 71 string
c@92 72 AdaptiveSpectrogram::getMaker() const
c@92 73 {
c@92 74 return "Queen Mary, University of London";
c@92 75 }
c@92 76
c@92 77 int
c@92 78 AdaptiveSpectrogram::getPluginVersion() const
c@92 79 {
c@92 80 return 1;
c@92 81 }
c@92 82
c@92 83 string
c@92 84 AdaptiveSpectrogram::getCopyright() const
c@92 85 {
c@92 86 return "Plugin by Wen Xue and Chris Cannam. Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved";
c@92 87 }
c@92 88
c@92 89 size_t
c@92 90 AdaptiveSpectrogram::getPreferredStepSize() const
c@92 91 {
c@92 92 return ((2 << m_w) << m_n) / 2;
c@92 93 }
c@92 94
c@92 95 size_t
c@92 96 AdaptiveSpectrogram::getPreferredBlockSize() const
c@92 97 {
c@92 98 return (2 << m_w) << m_n;
c@92 99 }
c@92 100
c@92 101 bool
c@92 102 AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize)
c@92 103 {
c@92 104 if (channels < getMinChannelCount() ||
c@92 105 channels > getMaxChannelCount()) return false;
c@92 106
c@92 107 return true;
c@92 108 }
c@92 109
c@92 110 void
c@92 111 AdaptiveSpectrogram::reset()
c@92 112 {
c@92 113
c@92 114 }
c@92 115
c@92 116 AdaptiveSpectrogram::ParameterList
c@92 117 AdaptiveSpectrogram::getParameterDescriptors() const
c@92 118 {
c@92 119 ParameterList list;
c@92 120
c@92 121 ParameterDescriptor desc;
c@92 122 desc.identifier = "n";
c@92 123 desc.name = "Number of resolutions";
c@114 124 desc.description = "Number of consecutive powers of two in the range to be used as spectrogram resolutions, starting with the minimum resolution specified";
c@92 125 desc.unit = "";
c@114 126 desc.minValue = 2;
c@92 127 desc.maxValue = 10;
c@114 128 desc.defaultValue = 3;
c@92 129 desc.isQuantized = true;
c@92 130 desc.quantizeStep = 1;
c@92 131 list.push_back(desc);
c@92 132
c@92 133 ParameterDescriptor desc2;
c@92 134 desc2.identifier = "w";
c@92 135 desc2.name = "Smallest resolution";
c@92 136 desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions";
c@92 137 desc2.unit = "";
c@92 138 desc2.minValue = 1;
c@92 139 desc2.maxValue = 14;
c@104 140 desc2.defaultValue = 9;
c@92 141 desc2.isQuantized = true;
c@92 142 desc2.quantizeStep = 1;
c@92 143 // I am so lazy
c@92 144 desc2.valueNames.push_back("2");
c@92 145 desc2.valueNames.push_back("4");
c@92 146 desc2.valueNames.push_back("8");
c@92 147 desc2.valueNames.push_back("16");
c@92 148 desc2.valueNames.push_back("32");
c@92 149 desc2.valueNames.push_back("64");
c@92 150 desc2.valueNames.push_back("128");
c@92 151 desc2.valueNames.push_back("256");
c@92 152 desc2.valueNames.push_back("512");
c@92 153 desc2.valueNames.push_back("1024");
c@92 154 desc2.valueNames.push_back("2048");
c@92 155 desc2.valueNames.push_back("4096");
c@92 156 desc2.valueNames.push_back("8192");
c@92 157 desc2.valueNames.push_back("16384");
c@92 158 list.push_back(desc2);
c@92 159
c@109 160 ParameterDescriptor desc3;
c@114 161 desc3.identifier = "coarse";
c@114 162 desc3.name = "Omit alternate resolutions";
c@114 163 desc3.description = "Generate a coarser spectrogram faster by excluding every alternate resolution (first and last resolution are always retained)";
c@114 164 desc3.unit = "";
c@114 165 desc3.minValue = 0;
c@114 166 desc3.maxValue = 1;
c@114 167 desc3.defaultValue = 0;
c@114 168 desc3.isQuantized = true;
c@114 169 desc3.quantizeStep = 1;
c@114 170 list.push_back(desc3);
c@114 171
c@109 172 desc3.identifier = "threaded";
c@109 173 desc3.name = "Multi-threaded processing";
c@110 174 desc3.description = "Perform calculations using several threads in parallel";
c@109 175 desc3.unit = "";
c@109 176 desc3.minValue = 0;
c@109 177 desc3.maxValue = 1;
c@109 178 desc3.defaultValue = 1;
c@109 179 desc3.isQuantized = true;
c@109 180 desc3.quantizeStep = 1;
c@109 181 list.push_back(desc3);
c@109 182
c@92 183 return list;
c@92 184 }
c@92 185
c@92 186 float
c@92 187 AdaptiveSpectrogram::getParameter(std::string id) const
c@92 188 {
c@92 189 if (id == "n") return m_n+1;
c@92 190 else if (id == "w") return m_w+1;
c@109 191 else if (id == "threaded") return (m_threaded ? 1 : 0);
c@114 192 else if (id == "coarse") return (m_coarse ? 1 : 0);
c@92 193 return 0.f;
c@92 194 }
c@92 195
c@92 196 void
c@92 197 AdaptiveSpectrogram::setParameter(std::string id, float value)
c@92 198 {
c@92 199 if (id == "n") {
c@92 200 int n = lrintf(value);
c@92 201 if (n >= 1 && n <= 10) m_n = n-1;
c@92 202 } else if (id == "w") {
c@92 203 int w = lrintf(value);
c@92 204 if (w >= 1 && w <= 14) m_w = w-1;
c@109 205 } else if (id == "threaded") {
c@109 206 m_threaded = (value > 0.5);
c@114 207 } else if (id == "coarse") {
c@114 208 m_coarse = (value > 0.5);
c@109 209 }
c@92 210 }
c@92 211
c@92 212 AdaptiveSpectrogram::OutputList
c@92 213 AdaptiveSpectrogram::getOutputDescriptors() const
c@92 214 {
c@92 215 OutputList list;
c@92 216
c@92 217 OutputDescriptor d;
c@92 218 d.identifier = "output";
c@92 219 d.name = "Output";
c@92 220 d.description = "The output of the plugin";
c@92 221 d.unit = "";
c@92 222 d.hasFixedBinCount = true;
c@114 223 d.binCount = getPreferredBlockSize() / 2;
c@92 224 d.hasKnownExtents = false;
c@92 225 d.isQuantized = false;
c@92 226 d.sampleType = OutputDescriptor::FixedSampleRate;
c@92 227 d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2);
c@92 228 d.hasDuration = false;
c@112 229 char name[20];
c@112 230 for (int i = 0; i < d.binCount; ++i) {
c@114 231 float freq = (m_inputSampleRate / (d.binCount * 2)) * (i + 1); // no DC bin
c@112 232 sprintf(name, "%d Hz", int(freq));
c@112 233 d.binNames.push_back(name);
c@112 234 }
c@92 235 list.push_back(d);
c@92 236
c@92 237 return list;
c@92 238 }
c@92 239
c@92 240 AdaptiveSpectrogram::FeatureSet
c@92 241 AdaptiveSpectrogram::getRemainingFeatures()
c@92 242 {
c@92 243 FeatureSet fs;
c@92 244 return fs;
c@92 245 }
c@92 246
c@100 247 AdaptiveSpectrogram::FeatureSet
c@100 248 AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts)
c@100 249 {
c@100 250 FeatureSet fs;
c@100 251
c@100 252 int minwid = (2 << m_w), maxwid = ((2 << m_w) << m_n);
c@100 253
c@101 254 #ifdef DEBUG_VERBOSE
c@100 255 cerr << "widths from " << minwid << " to " << maxwid << " ("
c@100 256 << minwid/2 << " to " << maxwid/2 << " in real parts)" << endl;
c@101 257 #endif
c@100 258
c@100 259 Spectrograms s(minwid/2, maxwid/2, 1);
c@100 260
c@100 261 int w = minwid;
c@100 262 int index = 0;
c@100 263
c@100 264 while (w <= maxwid) {
c@114 265
c@114 266 if (!isResolutionWanted(s, w/2)) {
c@114 267 w *= 2;
c@114 268 ++index;
c@114 269 continue;
c@114 270 }
c@114 271
c@106 272 if (m_fftThreads.find(w) == m_fftThreads.end()) {
c@106 273 m_fftThreads[w] = new FFTThread(w);
c@106 274 }
c@109 275 if (m_threaded) {
c@114 276 m_fftThreads[w]->startCalculation
c@114 277 (inputBuffers[0], s, index, maxwid);
c@109 278 } else {
c@114 279 m_fftThreads[w]->setParameters
c@114 280 (inputBuffers[0], s, index, maxwid);
c@109 281 m_fftThreads[w]->performTask();
c@109 282 }
c@100 283 w *= 2;
c@100 284 ++index;
c@100 285 }
c@100 286
c@109 287 if (m_threaded) {
c@109 288 w = minwid;
c@114 289 index = 0;
c@109 290 while (w <= maxwid) {
c@114 291 if (!isResolutionWanted(s, w/2)) {
c@114 292 w *= 2;
c@114 293 ++index;
c@114 294 continue;
c@114 295 }
c@109 296 m_fftThreads[w]->await();
c@109 297 w *= 2;
c@114 298 ++index;
c@109 299 }
c@105 300 }
c@102 301
c@109 302 m_threadsInUse = false;
c@104 303
c@114 304 // std::cerr << "maxwid/2 = " << maxwid/2 << ", minwid/2 = " << minwid/2 << ", n+1 = " << m_n+1 << ", 2^(n+1) = " << (2<<m_n) << std::endl;
c@110 305
c@114 306 int cutwid = maxwid/2;
c@114 307 Cutting *cutting = cut(s, cutwid, 0, 0, cutwid, 0);
c@100 308
c@101 309 #ifdef DEBUG_VERBOSE
c@100 310 printCutting(cutting, " ");
c@101 311 #endif
c@100 312
c@100 313 vector<vector<float> > rmat(maxwid/minwid);
c@100 314 for (int i = 0; i < maxwid/minwid; ++i) {
c@100 315 rmat[i] = vector<float>(maxwid/2);
c@100 316 }
c@100 317
c@114 318 assemble(s, cutting, rmat, 0, 0, maxwid/minwid, cutwid);
c@100 319
c@110 320 cutting->erase();
c@100 321
c@100 322 for (int i = 0; i < rmat.size(); ++i) {
c@100 323 Feature f;
c@100 324 f.hasTimestamp = false;
c@100 325 f.values = rmat[i];
c@100 326 fs[0].push_back(f);
c@100 327 }
c@100 328
c@104 329 // std::cerr << "process returning!\n" << std::endl;
c@104 330
c@100 331 return fs;
c@100 332 }
c@100 333
c@100 334 void
c@104 335 AdaptiveSpectrogram::printCutting(Cutting *c, string pfx) const
c@100 336 {
c@100 337 if (c->first) {
c@100 338 if (c->cut == Cutting::Horizontal) {
c@100 339 cerr << pfx << "H" << endl;
c@100 340 } else if (c->cut == Cutting::Vertical) {
c@100 341 cerr << pfx << "V" << endl;
c@100 342 }
c@100 343 printCutting(c->first, pfx + " ");
c@100 344 printCutting(c->second, pfx + " ");
c@100 345 } else {
c@100 346 cerr << pfx << "* " << c->value << endl;
c@100 347 }
c@100 348 }
c@100 349
c@104 350 void
c@104 351 AdaptiveSpectrogram::getSubCuts(const Spectrograms &s,
c@104 352 int res,
c@104 353 int x, int y, int h,
c@114 354 Cutting **top, Cutting **bottom,
c@114 355 Cutting **left, Cutting **right,
c@113 356 BlockAllocator *allocator) const
c@104 357 {
c@109 358 if (m_threaded && !m_threadsInUse) {
c@104 359
c@109 360 m_threadsInUse = true;
c@104 361
c@104 362 if (m_cutThreads.empty()) {
c@104 363 for (int i = 0; i < 4; ++i) {
c@104 364 CutThread *t = new CutThread(this);
c@104 365 m_cutThreads.push_back(t);
c@104 366 }
c@104 367 }
c@104 368
c@109 369 // Cut threads 0 and 1 calculate the top and bottom halves;
c@110 370 // threads 2 and 3 calculate left and right. See notes in
c@110 371 // unthreaded code below for more information.
c@104 372
c@114 373 if (top) m_cutThreads[0]->cut(s, res, x, y + h/2, h/2);
c@114 374 if (bottom) m_cutThreads[1]->cut(s, res, x, y, h/2);
c@104 375
c@114 376 if (left) m_cutThreads[2]->cut(s, res/2, 2 * x, y/2, h/2);
c@114 377 if (right) m_cutThreads[3]->cut(s, res/2, 2 * x + 1, y/2, h/2);
c@114 378
c@114 379 if (top) *top = m_cutThreads[0]->get();
c@114 380 if (bottom) *bottom = m_cutThreads[1]->get();
c@114 381 if (left) *left = m_cutThreads[2]->get();
c@114 382 if (right) *right = m_cutThreads[3]->get();
c@104 383
c@104 384 } else {
c@104 385
c@110 386 // Unthreaded version
c@104 387
c@104 388 // The "vertical" division is a top/bottom split.
c@104 389 // Splitting this way keeps us in the same resolution,
c@104 390 // but with two vertical subregions of height h/2.
c@104 391
c@114 392 if (top) *top = cut(s, res, x, y + h/2, h/2, allocator);
c@114 393 if (bottom) *bottom = cut(s, res, x, y, h/2, allocator);
c@104 394
c@104 395 // The "horizontal" division is a left/right split. Splitting
c@104 396 // this way places us in resolution res/2, which has lower
c@104 397 // vertical resolution but higher horizontal resolution. We
c@104 398 // need to double x accordingly.
c@104 399
c@114 400 if (left) *left = cut(s, res/2, 2 * x, y/2, h/2, allocator);
c@114 401 if (right) *right = cut(s, res/2, 2 * x + 1, y/2, h/2, allocator);
c@104 402 }
c@104 403 }
c@104 404
c@100 405 AdaptiveSpectrogram::Cutting *
c@100 406 AdaptiveSpectrogram::cut(const Spectrograms &s,
c@100 407 int res,
c@110 408 int x, int y, int h,
c@110 409 BlockAllocator *allocator) const
c@100 410 {
c@100 411 // cerr << "res = " << res << ", x = " << x << ", y = " << y << ", h = " << h << endl;
c@100 412
c@110 413 Cutting *cutting;
c@110 414 if (allocator) {
c@110 415 cutting = (Cutting *)(allocator->allocate());
c@110 416 cutting->allocator = allocator;
c@110 417 } else {
c@110 418 cutting = new Cutting;
c@110 419 cutting->allocator = 0;
c@110 420 }
c@110 421
c@100 422 if (h > 1 && res > s.minres) {
c@100 423
c@114 424 if (!isResolutionWanted(s, res)) {
c@100 425
c@114 426 Cutting *left = 0, *right = 0;
c@114 427 getSubCuts(s, res, x, y, h, 0, 0, &left, &right, allocator);
c@114 428
c@114 429 double hcost = left->cost + right->cost;
c@101 430 double henergy = left->value + right->value;
c@114 431 hcost = normalize(hcost, henergy);
c@114 432
c@100 433 cutting->cut = Cutting::Horizontal;
c@100 434 cutting->first = left;
c@100 435 cutting->second = right;
c@100 436 cutting->cost = hcost;
c@111 437 cutting->value = left->value + right->value;
c@100 438
c@114 439 } else if (h == 2 && !isResolutionWanted(s, res/2)) {
c@100 440
c@114 441 Cutting *top = 0, *bottom = 0;
c@114 442 getSubCuts(s, res, x, y, h, &top, &bottom, 0, 0, allocator);
c@114 443
c@114 444 double vcost = top->cost + bottom->cost;
c@114 445 double venergy = top->value + bottom->value;
c@114 446 vcost = normalize(vcost, venergy);
c@114 447
c@100 448 cutting->cut = Cutting::Vertical;
c@100 449 cutting->first = top;
c@100 450 cutting->second = bottom;
c@100 451 cutting->cost = vcost;
c@111 452 cutting->value = top->value + bottom->value;
c@114 453
c@114 454 } else {
c@114 455
c@114 456 Cutting *top = 0, *bottom = 0, *left = 0, *right = 0;
c@114 457 getSubCuts(s, res, x, y, h, &top, &bottom, &left, &right, allocator);
c@114 458
c@114 459 double vcost = top->cost + bottom->cost;
c@114 460 double venergy = top->value + bottom->value;
c@114 461 vcost = normalize(vcost, venergy);
c@114 462
c@114 463 double hcost = left->cost + right->cost;
c@114 464 double henergy = left->value + right->value;
c@114 465 hcost = normalize(hcost, henergy);
c@114 466
c@114 467 if (vcost > hcost) {
c@114 468 cutting->cut = Cutting::Horizontal;
c@114 469 cutting->first = left;
c@114 470 cutting->second = right;
c@114 471 cutting->cost = hcost;
c@114 472 cutting->value = left->value + right->value;
c@114 473 top->erase();
c@114 474 bottom->erase();
c@114 475 return cutting;
c@114 476 } else {
c@114 477 cutting->cut = Cutting::Vertical;
c@114 478 cutting->first = top;
c@114 479 cutting->second = bottom;
c@114 480 cutting->cost = vcost;
c@114 481 cutting->value = top->value + bottom->value;
c@114 482 left->erase();
c@114 483 right->erase();
c@114 484 return cutting;
c@114 485 }
c@100 486 }
c@100 487
c@100 488 } else {
c@100 489
c@100 490 // no cuts possible from this level
c@100 491
c@100 492 cutting->cut = Cutting::Finished;
c@100 493 cutting->first = 0;
c@100 494 cutting->second = 0;
c@100 495
c@100 496 int n = 0;
c@114 497 for (int r = res; r > s.minres; r >>= 1) ++n;
c@100 498 const Spectrogram *spectrogram = s.spectrograms[n];
c@100 499 cutting->cost = cost(*spectrogram, x, y);
c@100 500 cutting->value = value(*spectrogram, x, y);
c@114 501 }
c@100 502
c@114 503 return cutting;
c@100 504 }
c@100 505
c@100 506 void
c@100 507 AdaptiveSpectrogram::assemble(const Spectrograms &s,
c@100 508 const Cutting *cutting,
c@100 509 vector<vector<float> > &rmat,
c@104 510 int x, int y, int w, int h) const
c@100 511 {
c@100 512 switch (cutting->cut) {
c@100 513
c@100 514 case Cutting::Finished:
c@100 515 for (int i = 0; i < w; ++i) {
c@100 516 for (int j = 0; j < h; ++j) {
c@114 517 rmat[x+i][y+j] = cutting->value;
c@100 518 }
c@100 519 }
c@100 520 return;
c@100 521
c@100 522 case Cutting::Horizontal:
c@100 523 assemble(s, cutting->first, rmat, x, y, w/2, h);
c@100 524 assemble(s, cutting->second, rmat, x+w/2, y, w/2, h);
c@100 525 break;
c@100 526
c@100 527 case Cutting::Vertical:
c@100 528 assemble(s, cutting->first, rmat, x, y+h/2, w, h/2);
c@100 529 assemble(s, cutting->second, rmat, x, y, w, h/2);
c@100 530 break;
c@100 531 }
c@100 532 }
c@100 533