annotate plugins/AdaptiveSpectrogram.cpp @ 100:bae940a2ff18

* some experiments in working from the original paper for the adaptive spectrogram (to be continued)
author Chris Cannam <c.cannam@qmul.ac.uk>
date Mon, 27 Apr 2009 15:30:05 +0000
parents 8700a93424f4
children ff383bfee463
rev   line source
c@92 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@92 2
c@92 3 /*
c@92 4 QM Vamp Plugin Set
c@92 5
c@92 6 Centre for Digital Music, Queen Mary, University of London.
c@92 7 All rights reserved.
c@92 8 */
c@92 9
c@92 10 #include "AdaptiveSpectrogram.h"
c@92 11
c@92 12 #include <cstdlib>
c@92 13 #include <cstring>
c@92 14
c@92 15 #include <iostream>
c@92 16
c@92 17 #include <dsp/transforms/FFT.h>
c@92 18
c@92 19 using std::string;
c@92 20 using std::vector;
c@92 21 using std::cerr;
c@92 22 using std::endl;
c@92 23
c@92 24 using Vamp::RealTime;
c@92 25
c@99 26 //#define DEBUG_VERBOSE 1
c@99 27
c@92 28 AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) :
c@92 29 Plugin(inputSampleRate),
c@92 30 m_w(9),
c@92 31 m_n(2)
c@100 32 // m_w(0),
c@100 33 // m_n(2)
c@92 34 {
c@92 35 }
c@92 36
c@92 37 AdaptiveSpectrogram::~AdaptiveSpectrogram()
c@92 38 {
c@92 39 }
c@92 40
c@92 41 string
c@92 42 AdaptiveSpectrogram::getIdentifier() const
c@92 43 {
c@93 44 return "qm-adaptivespectrogram";
c@92 45 }
c@92 46
c@92 47 string
c@92 48 AdaptiveSpectrogram::getName() const
c@92 49 {
c@92 50 return "Adaptive Spectrogram";
c@92 51 }
c@92 52
c@92 53 string
c@92 54 AdaptiveSpectrogram::getDescription() const
c@92 55 {
c@92 56 return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions";
c@92 57 }
c@92 58
c@92 59 string
c@92 60 AdaptiveSpectrogram::getMaker() const
c@92 61 {
c@92 62 return "Queen Mary, University of London";
c@92 63 }
c@92 64
c@92 65 int
c@92 66 AdaptiveSpectrogram::getPluginVersion() const
c@92 67 {
c@92 68 return 1;
c@92 69 }
c@92 70
c@92 71 string
c@92 72 AdaptiveSpectrogram::getCopyright() const
c@92 73 {
c@92 74 return "Plugin by Wen Xue and Chris Cannam. Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved";
c@92 75 }
c@92 76
c@92 77 size_t
c@92 78 AdaptiveSpectrogram::getPreferredStepSize() const
c@92 79 {
c@92 80 return ((2 << m_w) << m_n) / 2;
c@92 81 }
c@92 82
c@92 83 size_t
c@92 84 AdaptiveSpectrogram::getPreferredBlockSize() const
c@92 85 {
c@92 86 return (2 << m_w) << m_n;
c@92 87 }
c@92 88
c@92 89 bool
c@92 90 AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize)
c@92 91 {
c@92 92 if (channels < getMinChannelCount() ||
c@92 93 channels > getMaxChannelCount()) return false;
c@92 94
c@92 95 return true;
c@92 96 }
c@92 97
c@92 98 void
c@92 99 AdaptiveSpectrogram::reset()
c@92 100 {
c@92 101
c@92 102 }
c@92 103
c@92 104 AdaptiveSpectrogram::ParameterList
c@92 105 AdaptiveSpectrogram::getParameterDescriptors() const
c@92 106 {
c@92 107 ParameterList list;
c@92 108
c@92 109 ParameterDescriptor desc;
c@92 110 desc.identifier = "n";
c@92 111 desc.name = "Number of resolutions";
c@92 112 desc.description = "Number of consecutive powers of two to use as spectrogram resolutions, starting with the minimum resolution specified";
c@92 113 desc.unit = "";
c@92 114 desc.minValue = 1;
c@92 115 desc.maxValue = 10;
c@92 116 desc.defaultValue = 3;
c@92 117 desc.isQuantized = true;
c@92 118 desc.quantizeStep = 1;
c@92 119 list.push_back(desc);
c@92 120
c@92 121 ParameterDescriptor desc2;
c@92 122 desc2.identifier = "w";
c@92 123 desc2.name = "Smallest resolution";
c@92 124 desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions";
c@92 125 desc2.unit = "";
c@92 126 desc2.minValue = 1;
c@92 127 desc2.maxValue = 14;
c@92 128 desc2.defaultValue = 10;
c@92 129 desc2.isQuantized = true;
c@92 130 desc2.quantizeStep = 1;
c@92 131 // I am so lazy
c@92 132 desc2.valueNames.push_back("2");
c@92 133 desc2.valueNames.push_back("4");
c@92 134 desc2.valueNames.push_back("8");
c@92 135 desc2.valueNames.push_back("16");
c@92 136 desc2.valueNames.push_back("32");
c@92 137 desc2.valueNames.push_back("64");
c@92 138 desc2.valueNames.push_back("128");
c@92 139 desc2.valueNames.push_back("256");
c@92 140 desc2.valueNames.push_back("512");
c@92 141 desc2.valueNames.push_back("1024");
c@92 142 desc2.valueNames.push_back("2048");
c@92 143 desc2.valueNames.push_back("4096");
c@92 144 desc2.valueNames.push_back("8192");
c@92 145 desc2.valueNames.push_back("16384");
c@92 146 list.push_back(desc2);
c@92 147
c@92 148 return list;
c@92 149 }
c@92 150
c@92 151 float
c@92 152 AdaptiveSpectrogram::getParameter(std::string id) const
c@92 153 {
c@92 154 if (id == "n") return m_n+1;
c@92 155 else if (id == "w") return m_w+1;
c@92 156 return 0.f;
c@92 157 }
c@92 158
c@92 159 void
c@92 160 AdaptiveSpectrogram::setParameter(std::string id, float value)
c@92 161 {
c@92 162 if (id == "n") {
c@92 163 int n = lrintf(value);
c@92 164 if (n >= 1 && n <= 10) m_n = n-1;
c@92 165 } else if (id == "w") {
c@92 166 int w = lrintf(value);
c@92 167 if (w >= 1 && w <= 14) m_w = w-1;
c@92 168 }
c@92 169 }
c@92 170
c@92 171 AdaptiveSpectrogram::OutputList
c@92 172 AdaptiveSpectrogram::getOutputDescriptors() const
c@92 173 {
c@92 174 OutputList list;
c@92 175
c@92 176 OutputDescriptor d;
c@92 177 d.identifier = "output";
c@92 178 d.name = "Output";
c@92 179 d.description = "The output of the plugin";
c@92 180 d.unit = "";
c@92 181 d.hasFixedBinCount = true;
c@92 182 d.binCount = ((2 << m_w) << m_n) / 2;
c@92 183 d.hasKnownExtents = false;
c@92 184 d.isQuantized = false;
c@92 185 d.sampleType = OutputDescriptor::FixedSampleRate;
c@92 186 d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2);
c@92 187 d.hasDuration = false;
c@92 188 list.push_back(d);
c@92 189
c@92 190 return list;
c@92 191 }
c@92 192
c@92 193 AdaptiveSpectrogram::FeatureSet
c@92 194 AdaptiveSpectrogram::getRemainingFeatures()
c@92 195 {
c@92 196 FeatureSet fs;
c@92 197 return fs;
c@92 198 }
c@92 199
c@100 200 #ifdef NOT_DEFINED
c@92 201 AdaptiveSpectrogram::FeatureSet
c@92 202 AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts)
c@92 203 {
c@92 204 FeatureSet fs;
c@92 205
c@92 206 int wid = (2 << m_w), WID = ((2 << m_w) << m_n);
c@92 207 int Res = log2(WID/wid)+1;
c@92 208 double ***specs = new double **[Res];
c@92 209 int Wid = WID;
c@92 210 int wi = 0;
c@92 211
c@99 212 #ifdef DEBUG_VERBOSE
c@92 213 cerr << "wid = " << wid << ", WID = " << WID << endl;
c@99 214 #endif
c@92 215
c@92 216 double *tmpin = new double[WID];
c@92 217 double *tmprout = new double[WID];
c@92 218 double *tmpiout = new double[WID];
c@92 219
c@92 220 while (Wid >= wid) {
c@92 221 specs[wi] = new double *[WID/Wid];
c@92 222 for (int i = 0; i < WID/Wid; ++i) {
c@92 223 specs[wi][i] = new double[Wid/2];
c@92 224 int origin = WID/4 - Wid/4; // for 50% overlap
c@92 225 for (int j = 0; j < Wid; ++j) {
c@92 226 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / Wid);
c@92 227 tmpin[j] = inputBuffers[0][origin + i * Wid/2 + j] * mul;
c@92 228 }
c@92 229 FFT::process(Wid, false, tmpin, 0, tmprout, tmpiout);
c@92 230 for (int j = 0; j < Wid/2; ++j) {
c@99 231 int k = j+1; // include Nyquist but not DC
c@99 232 double mag = sqrt(tmprout[k] * tmprout[k] +
c@99 233 tmpiout[k] * tmpiout[k]);
c@99 234 double scaled = mag / (Wid/2);
c@99 235 // double power = scaled*scaled;
c@99 236 // if (k < Wid/2) power = power*2;
c@99 237 specs[wi][i][j] = scaled;
c@92 238 }
c@92 239 }
c@92 240 Wid /= 2;
c@92 241 ++wi;
c@92 242 }
c@92 243
c@99 244 /*
c@99 245 while (Wid >= wid) {
c@99 246 specs[wi] = new double *[WID/Wid];
c@99 247 cerr << "filling width " << Wid << endl;
c@99 248 for (int i = 0; i < WID/Wid; ++i) {
c@99 249 specs[wi][i] = new double[Wid/2];
c@99 250 for (int j = 0; j < Wid/2; ++j) {
c@99 251
c@99 252 specs[wi][i][j] = 0;
c@99 253 int x0 = i * Wid/2;
c@99 254 int x1 = (i+1) * Wid/2 - 1;
c@99 255 int y0 = j * (WID/Wid);
c@99 256 int y1 = (j+1) * (WID/Wid) - 1;
c@99 257
c@99 258 cerr << "box at " << i << "," << j << " covers [" << x0 << "," << y0 << "] to [" << x1 << "," << y1 << "]" << endl;
c@99 259
c@99 260 for (int y = WID/4; y < WID/2; ++y) {
c@99 261 for (int x = WID/4-2; x < WID/4; ++x) {
c@99 262 if (x >= x0 && x <= x1 && y >= y0 && y <= y1) {
c@99 263 ++specs[wi][i][j];
c@99 264 }
c@99 265 }
c@99 266 }
c@99 267
c@99 268 for (int x = 0; x < WID/2; ++x) {
c@99 269 int y = 0;
c@99 270 if (x >= x0 && x <= x1 && y >= y0 && y <= y1) {
c@99 271 ++specs[wi][i][j];
c@99 272 }
c@99 273 }
c@99 274 }
c@99 275 }
c@99 276 cerr << "this spectrogram:" << endl;
c@99 277 for (int j = Wid/2-1; j >= 0; --j) {
c@99 278 for (int i = 0; i < WID/Wid; ++i) {
c@99 279 cerr << specs[wi][i][j] << " ";
c@99 280 }
c@99 281 cerr << endl;
c@99 282 }
c@99 283 Wid /= 2;
c@99 284 ++wi;
c@99 285 }
c@99 286 */
c@99 287
c@92 288 int *spl = new int[WID/2];
c@92 289 double *spec = new double[WID/2];
c@92 290
c@92 291 // This prefill makes it easy to see which elements are actually
c@92 292 // set by the MixSpectrogramBlock2 call. Turns out that, with
c@92 293 // 1024, 2048 and 4096 as our widths, the spl array has elements
c@92 294 // 0-4094 (incl) filled in and the spec array has elements 0-4095
c@92 295
c@92 296 for (int i = 0; i < WID/2; ++i) {
c@92 297 spl[i] = i;
c@92 298 spec[i] = i;
c@92 299 }
c@92 300
c@92 301 MixSpectrogramBlock2(spl, spec, specs, WID/2, wid/2, false);
c@92 302
c@92 303 Wid = WID;
c@92 304 wi = 0;
c@92 305 while (Wid >= wid) {
c@92 306 for (int i = 0; i < WID/Wid; ++i) {
c@92 307 delete[] specs[wi][i];
c@92 308 }
c@92 309 delete[] specs[wi];
c@92 310 Wid /= 2;
c@92 311 ++wi;
c@92 312 }
c@92 313 delete[] specs;
c@92 314
c@99 315 #ifdef DEBUG_VERBOSE
c@92 316 std::cerr << "Results at " << ts << ":" << std::endl;
c@99 317 for (int i = 0; i < WID/2; ++i) {
c@99 318 // if (spl[i] == i || spec[i] == i) {
c@99 319 // std::cerr << "\n***\n";
c@99 320 // }
c@92 321 std::cerr << "[" << i << "] " << spl[i] << "," << spec[i] << " ";
c@92 322 }
c@92 323 std::cerr << std::endl;
c@99 324 #endif
c@99 325
c@92 326 vector<vector<float> > rmat(WID/wid);
c@92 327 for (int i = 0; i < WID/wid; ++i) {
c@92 328 rmat[i] = vector<float>(WID/2);
c@92 329 }
c@92 330
c@92 331 int y = 0, h = WID/2;
c@92 332 int x = 0, w = WID/wid;
c@92 333 unpackResultMatrix(rmat, x, y, w, h, spl, spec, WID/2, WID);
c@92 334
c@92 335 delete[] spec;
c@92 336 delete[] spl;
c@92 337
c@92 338 for (int i = 0; i < rmat.size(); ++i) {
c@92 339 Feature f;
c@92 340 f.hasTimestamp = false;
c@92 341 f.values = rmat[i];
c@92 342 fs[0].push_back(f);
c@92 343 }
c@92 344
c@92 345 /*
c@92 346 if (m_stepSize == 0) {
c@92 347 cerr << "ERROR: AdaptiveSpectrogram::process: "
c@92 348 << "AdaptiveSpectrogram has not been initialised"
c@92 349 << endl;
c@92 350 return fs;
c@92 351 }
c@92 352 */
c@92 353 return fs;
c@92 354 }
c@100 355 #endif
c@92 356
c@100 357 AdaptiveSpectrogram::FeatureSet
c@100 358 AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts)
c@100 359 {
c@100 360 FeatureSet fs;
c@100 361
c@100 362 int minwid = (2 << m_w), maxwid = ((2 << m_w) << m_n);
c@100 363
c@100 364 cerr << "widths from " << minwid << " to " << maxwid << " ("
c@100 365 << minwid/2 << " to " << maxwid/2 << " in real parts)" << endl;
c@100 366
c@100 367 Spectrograms s(minwid/2, maxwid/2, 1);
c@100 368
c@100 369 double *tmpin = new double[maxwid];
c@100 370 double *tmprout = new double[maxwid];
c@100 371 double *tmpiout = new double[maxwid];
c@100 372
c@100 373 int w = minwid;
c@100 374 int index = 0;
c@100 375
c@100 376 while (w <= maxwid) {
c@100 377 for (int i = 0; i < maxwid / w; ++i) {
c@100 378 int origin = maxwid/4 - w/4; // for 50% overlap
c@100 379 for (int j = 0; j < w; ++j) {
c@100 380 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / w);
c@100 381 tmpin[j] = inputBuffers[0][origin + i * w/2 + j] * mul;
c@100 382 }
c@100 383 FFT::process(w, false, tmpin, 0, tmprout, tmpiout);
c@100 384 for (int j = 0; j < w/2; ++j) {
c@100 385 int k = j+1; // include Nyquist but not DC
c@100 386 double mag = sqrt(tmprout[k] * tmprout[k] +
c@100 387 tmpiout[k] * tmpiout[k]);
c@100 388 double scaled = mag / (w/2);
c@100 389 s.spectrograms[index]->data[i][j] = scaled;
c@100 390 }
c@100 391 }
c@100 392 w *= 2;
c@100 393 ++index;
c@100 394 }
c@100 395
c@100 396 Cutting *cutting = cut(s, maxwid/2, 0, 0, maxwid/2);
c@100 397
c@100 398 printCutting(cutting, " ");
c@100 399
c@100 400 vector<vector<float> > rmat(maxwid/minwid);
c@100 401 for (int i = 0; i < maxwid/minwid; ++i) {
c@100 402 rmat[i] = vector<float>(maxwid/2);
c@100 403 }
c@100 404
c@100 405 assemble(s, cutting, rmat, 0, 0, maxwid/minwid, maxwid/2);
c@100 406
c@100 407 delete cutting;
c@100 408
c@100 409 for (int i = 0; i < rmat.size(); ++i) {
c@100 410 Feature f;
c@100 411 f.hasTimestamp = false;
c@100 412 f.values = rmat[i];
c@100 413 fs[0].push_back(f);
c@100 414 }
c@100 415
c@100 416 return fs;
c@100 417 }
c@100 418
c@100 419 void
c@100 420 AdaptiveSpectrogram::printCutting(Cutting *c, string pfx)
c@100 421 {
c@100 422 if (c->first) {
c@100 423 if (c->cut == Cutting::Horizontal) {
c@100 424 cerr << pfx << "H" << endl;
c@100 425 } else if (c->cut == Cutting::Vertical) {
c@100 426 cerr << pfx << "V" << endl;
c@100 427 }
c@100 428 printCutting(c->first, pfx + " ");
c@100 429 printCutting(c->second, pfx + " ");
c@100 430 } else {
c@100 431 cerr << pfx << "* " << c->value << endl;
c@100 432 }
c@100 433 }
c@100 434
c@100 435 AdaptiveSpectrogram::Cutting *
c@100 436 AdaptiveSpectrogram::cut(const Spectrograms &s,
c@100 437 int res,
c@100 438 int x, int y, int h)
c@100 439 {
c@100 440 // cerr << "res = " << res << ", x = " << x << ", y = " << y << ", h = " << h << endl;
c@100 441
c@100 442 if (h > 1 && res > s.minres) {
c@100 443
c@100 444 // The "vertical" division is a top/bottom split.
c@100 445 // Splitting this way keeps us in the same resolution,
c@100 446 // but with two vertical subregions of height h/2.
c@100 447
c@100 448 Cutting *top = cut(s, res, x, y + h/2, h/2);
c@100 449 Cutting *bottom = cut(s, res, x, y, h/2);
c@100 450
c@100 451 // The "horizontal" division is a left/right split. Splitting
c@100 452 // this way places us in resolution res/2, which has lower
c@100 453 // vertical resolution but higher horizontal resolution. We
c@100 454 // need to double x accordingly.
c@100 455
c@100 456 Cutting *left = cut(s, res/2, 2 * x, y/2, h/2);
c@100 457 Cutting *right = cut(s, res/2, 2 * x + 1, y/2, h/2);
c@100 458
c@100 459 double vcost = top->cost + bottom->cost;
c@100 460 double hcost = left->cost + right->cost;
c@100 461
c@100 462 if (vcost > hcost) {
c@100 463
c@100 464 // cut horizontally (left/right)
c@100 465
c@100 466 Cutting *cutting = new Cutting;
c@100 467 cutting->cut = Cutting::Horizontal;
c@100 468 cutting->first = left;
c@100 469 cutting->second = right;
c@100 470 cutting->cost = hcost;
c@100 471 cutting->value = left->value + right->value;
c@100 472 delete top;
c@100 473 delete bottom;
c@100 474 return cutting;
c@100 475
c@100 476 } else {
c@100 477
c@100 478 Cutting *cutting = new Cutting;
c@100 479 cutting->cut = Cutting::Vertical;
c@100 480 cutting->first = top;
c@100 481 cutting->second = bottom;
c@100 482 cutting->cost = vcost;
c@100 483 cutting->value = top->value + bottom->value;
c@100 484 delete left;
c@100 485 delete right;
c@100 486 return cutting;
c@100 487 }
c@100 488
c@100 489 } else {
c@100 490
c@100 491 // no cuts possible from this level
c@100 492
c@100 493 Cutting *cutting = new Cutting;
c@100 494 cutting->cut = Cutting::Finished;
c@100 495 cutting->first = 0;
c@100 496 cutting->second = 0;
c@100 497
c@100 498 int n = 0;
c@100 499 for (int r = res; r > s.minres; r /= 2) ++n;
c@100 500 const Spectrogram *spectrogram = s.spectrograms[n];
c@100 501
c@100 502 cutting->cost = cost(*spectrogram, x, y);
c@100 503 cutting->value = value(*spectrogram, x, y);
c@100 504
c@100 505 // cerr << "cost for this cell: " << cutting->cost << endl;
c@100 506
c@100 507 return cutting;
c@100 508 }
c@100 509 }
c@100 510
c@100 511 void
c@100 512 AdaptiveSpectrogram::assemble(const Spectrograms &s,
c@100 513 const Cutting *cutting,
c@100 514 vector<vector<float> > &rmat,
c@100 515 int x, int y, int w, int h)
c@100 516 {
c@100 517 switch (cutting->cut) {
c@100 518
c@100 519 case Cutting::Finished:
c@100 520 for (int i = 0; i < w; ++i) {
c@100 521 for (int j = 0; j < h; ++j) {
c@100 522 rmat[x+i][y+j] = cutting->value;
c@100 523 }
c@100 524 }
c@100 525 return;
c@100 526
c@100 527 case Cutting::Horizontal:
c@100 528 assemble(s, cutting->first, rmat, x, y, w/2, h);
c@100 529 assemble(s, cutting->second, rmat, x+w/2, y, w/2, h);
c@100 530 break;
c@100 531
c@100 532 case Cutting::Vertical:
c@100 533 assemble(s, cutting->first, rmat, x, y+h/2, w, h/2);
c@100 534 assemble(s, cutting->second, rmat, x, y, w, h/2);
c@100 535 break;
c@100 536 }
c@100 537 }
c@100 538
c@92 539 void
c@92 540 AdaptiveSpectrogram::unpackResultMatrix(vector<vector<float> > &rmat,
c@92 541 int x, int y, int w, int h,
c@92 542 int *spl,
c@92 543 double *spec, int sz,
c@92 544 int res
c@92 545 )
c@92 546 {
c@92 547
c@99 548 #ifdef DEBUG_VERBOSE
c@92 549 cerr << "x = " << x << ", y = " << y << ", w = " << w << ", h = " << h
c@92 550 << ", sz = " << sz << ", *spl = " << *spl << ", *spec = " << *spec << ", res = " << res << endl;
c@99 551 #endif
c@92 552
c@92 553 if (sz <= 1) {
c@92 554
c@92 555 for (int i = 0; i < w; ++i) {
c@92 556 for (int j = 0; j < h; ++j) {
c@92 557 if (rmat[x+i][y+j] != 0) {
c@92 558 cerr << "WARNING: Overwriting value " << rmat[x+i][y+j]
c@92 559 << " with " << res + i + j << " at " << x+i << "," << y+j << endl;
c@92 560 }
c@92 561 rmat[x+i][y+j] = *spec;
c@92 562 }
c@92 563 }
c@92 564
c@92 565 return;
c@92 566 }
c@92 567
c@92 568 if (*spl == 0) {
c@92 569
c@92 570 unpackResultMatrix(rmat,
c@92 571 x, y,
c@92 572 w, h/2,
c@92 573 spl + 1,
c@92 574 spec,
c@92 575 sz/2,
c@92 576 res);
c@92 577
c@92 578 unpackResultMatrix(rmat,
c@92 579 x, y + h/2,
c@92 580 w, h/2,
c@92 581 spl + sz/2,
c@92 582 spec + sz/2,
c@92 583 sz/2,
c@92 584 res);
c@92 585
c@92 586 } else if (*spl == 1) {
c@92 587
c@92 588 unpackResultMatrix(rmat,
c@92 589 x, y,
c@92 590 w/2, h,
c@92 591 spl + 1,
c@92 592 spec,
c@92 593 sz/2,
c@92 594 res/2);
c@92 595
c@92 596 unpackResultMatrix(rmat,
c@92 597 x + w/2, y,
c@92 598 w/2, h,
c@92 599 spl + sz/2,
c@92 600 spec + sz/2,
c@92 601 sz/2,
c@92 602 res/2);
c@92 603
c@92 604 } else {
c@92 605 cerr << "ERROR: *spl = " << *spl << endl;
c@92 606 }
c@92 607 }
c@92 608
c@92 609 //spl[Y-1]
c@92 610 //Specs[R0][x0:x0+x-1][Y0:Y0+Y-1]
c@92 611 //Specs[R0+1][2x0:2x0+2x-1][Y0/2:Y0/2+Y/2-1]
c@92 612 //...
c@92 613 //Specs[R0+?][Nx0:Nx0+Nx-1][Y0/N:Y0/N+Y/N-1]
c@92 614 //N=WID/wid
c@92 615
c@92 616 /**
c@92 617 * DoCutSpectrogramBlock2 finds the optimal "cutting" and returns it in spl.
c@92 618 */
c@92 619 double
c@92 620 AdaptiveSpectrogram::DoCutSpectrogramBlock2(int* spl, double*** Specs, int Y, int R0,
c@99 621 int x0, int Y0, int N, double& ene, string pfx)
c@92 622 {
c@92 623 double ent = 0;
c@92 624
c@99 625 #ifdef DEBUG_VERBOSE
c@99 626 cerr << pfx << "cutting with Y = " << Y << ", R0 = " << R0 << ", x0 = " << x0 << ", Y0 = " << Y0 << ", N = " << N << endl;
c@99 627 #endif
c@99 628
c@92 629 if (Y > N) {
c@92 630
c@99 631 #ifdef DEBUG_VERBOSE
c@99 632 cerr << pfx << "Y > N case, making top/bottom cut" << endl;
c@99 633 #endif
c@99 634
c@92 635 spl[0] = 0;
c@92 636 double ene1, ene2;
c@92 637
c@92 638 ent += DoCutSpectrogramBlock2
c@99 639 (&spl[1], Specs, Y/2, R0, x0, Y0, N, ene1, pfx + " ");
c@92 640
c@92 641 ent += DoCutSpectrogramBlock2
c@99 642 (&spl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N, ene2, pfx + " ");
c@92 643
c@92 644 ene = ene1+ene2;
c@92 645
c@92 646 } else if (N == 1) {
c@92 647
c@92 648 double tmp = Specs[R0][x0][Y0];
c@99 649
c@99 650 #ifdef DEBUG_VERBOSE
c@99 651 cerr << pfx << "N == 1 case (value here = " << tmp << ")" << endl;
c@99 652 #endif
c@99 653
c@92 654 ene = tmp;
c@92 655 ent = xlogx(tmp);
c@92 656
c@92 657 } else {
c@92 658 // Y == N, the square case
c@92 659
c@99 660 #ifdef DEBUG_VERBOSE
c@99 661 cerr << pfx << "Y == N case, testing left/right cut" << endl;
c@99 662 #endif
c@99 663
c@92 664 double enel, ener, enet, eneb, entl, entr, entt, entb;
c@92 665 int* tmpspl = new int[Y];
c@92 666
c@92 667 entl = DoCutSpectrogramBlock2
c@99 668 (&spl[1], Specs, Y/2, R0+1, 2*x0, Y0/2, N/2, enel, pfx + " ");
c@92 669
c@92 670 entr = DoCutSpectrogramBlock2
c@99 671 (&spl[Y/2], Specs, Y/2, R0+1, 2*x0+1, Y0/2, N/2, ener, pfx + " ");
c@99 672
c@99 673 #ifdef DEBUG_VERBOSE
c@99 674 cerr << pfx << "Y == N case, testing top/bottom cut" << endl;
c@99 675 #endif
c@92 676
c@92 677 entb = DoCutSpectrogramBlock2
c@99 678 (&tmpspl[1], Specs, Y/2, R0, x0, Y0, N/2, eneb, pfx + " ");
c@92 679
c@92 680 entt = DoCutSpectrogramBlock2
c@99 681 (&tmpspl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N/2, enet, pfx + " ");
c@92 682
c@92 683 double
c@92 684 ene0 = enet + eneb,
c@92 685 ene1 = enel + ener,
c@92 686 ent0 = entt + entb,
c@92 687 ent1 = entl + entr;
c@92 688
c@92 689 // normalize
c@92 690
c@92 691 double eneres = 1 - (ene0+ene1)/2, norment0, norment1;
c@92 692 double a0 = 1 / (ene0+eneres), a1 = 1 / (ene1+eneres);
c@92 693
c@92 694 // quasi-global normalization
c@92 695
c@99 696 // norment0 = (ent0 - ene0 * log(ene0+eneres)) / (ene0+eneres);
c@99 697 // norment1 = (ent1 - ene1 * log(ene1+eneres)) / (ene1+eneres);
c@99 698 norment0 = ene0;
c@99 699 norment1 = ene1;
c@92 700
c@92 701 // local normalization
c@92 702
c@92 703 if (norment1 < norment0) {
c@99 704 #ifdef DEBUG_VERBOSE
c@99 705 cerr << pfx << "top/bottom cut wins (" << norment0 << " > " << norment1 << "), returning sum ent " << ent0 << " and ene " << ene0 << endl;
c@99 706 #endif
c@92 707 spl[0] = 0;
c@92 708 ent = ent0, ene = ene0;
c@92 709 memcpy(&spl[1], &tmpspl[1], sizeof(int)*(Y-2));
c@92 710 } else {
c@99 711 #ifdef DEBUG_VERBOSE
c@99 712 cerr << pfx << "left/right cut wins (" << norment1 << " >= " << norment0 << "), returning sum ent " << ent1 << " and ene " << ene1 << endl;
c@99 713 #endif
c@92 714 spl[0] = 1;
c@92 715 ent = ent1, ene = ene1;
c@92 716 }
c@92 717 }
c@92 718 return ent;
c@92 719 }
c@92 720
c@92 721 /**
c@92 722 * DoMixSpectrogramBlock2 collects values from the multiple
c@92 723 * spectrograms Specs into a linear array Spec.
c@92 724 */
c@92 725 double
c@92 726 AdaptiveSpectrogram::DoMixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int Y,
c@92 727 int R0, int x0, int Y0, bool normmix, int res,
c@92 728 double* e)
c@92 729 {
c@92 730 if (Y == 1) {
c@92 731
c@92 732 Spec[0] = Specs[R0][x0][Y0]*e[0];
c@92 733
c@92 734 } else {
c@92 735
c@92 736 double le[32];
c@92 737
c@92 738 if (normmix && Y < (1<<res)) {
c@92 739
c@92 740 for (int i = 0, j = 1, k = Y;
c@92 741 i < res;
c@92 742 i++, j *= 2, k /= 2) {
c@92 743
c@92 744 double lle = 0;
c@92 745
c@92 746 for (int fr = 0; fr < j; fr++) {
c@92 747 for (int n = 0; n < k; n++) {
c@92 748 lle +=
c@92 749 Specs[i+R0][x0+fr][Y0+n] *
c@92 750 Specs[i+R0][x0+fr][Y0+n];
c@92 751 }
c@92 752 }
c@92 753
c@92 754 lle = sqrt(lle)*e[i];
c@92 755
c@92 756 if (i == 0) {
c@92 757 le[0] = lle;
c@92 758 } else if (lle > le[0]*2) {
c@92 759 le[i] = e[i]*le[0]*2/lle;
c@92 760 } else {
c@92 761 le[i] = e[i];
c@92 762 }
c@92 763 }
c@92 764
c@92 765 le[0] = e[0];
c@92 766
c@92 767 } else {
c@92 768
c@92 769 memcpy(le, e, sizeof(double)*res);
c@92 770 }
c@92 771
c@92 772 if (spl[0] == 0) {
c@92 773
c@92 774 int newres;
c@92 775 if (Y >= (1<<res)) newres = res;
c@92 776 else newres = res-1;
c@92 777
c@92 778 DoMixSpectrogramBlock2
c@92 779 (&spl[1], Spec, Specs, Y/2, R0, x0, Y0,
c@92 780 normmix, newres, le);
c@92 781
c@92 782 DoMixSpectrogramBlock2
c@92 783 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0, x0, Y0+Y/2,
c@92 784 normmix, newres, le);
c@92 785
c@92 786 } else {
c@92 787
c@92 788 DoMixSpectrogramBlock2
c@92 789 (&spl[1], Spec, Specs, Y/2, R0+1, x0*2, Y0/2,
c@92 790 normmix, res-1, &le[1]);
c@92 791
c@92 792 DoMixSpectrogramBlock2
c@92 793 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0+1, x0*2+1, Y0/2,
c@92 794 normmix, res-1, &le[1]);
c@92 795 }
c@92 796 }
c@92 797
c@92 798 return 0;
c@92 799 }
c@92 800
c@92 801 /**
c@92 802 * MixSpectrogramBlock2 calls the two Do...() to do the real work.
c@92 803 *
c@92 804 * At this point:
c@92 805 * spl is... what? the returned "cutting", organised how?
c@92 806 * Spec is... what? the returned spectrogram, organised how?
c@92 807 * Specs is an array of input spectrograms
c@92 808 * WID is the maximum window size
c@92 809 * wid is the minimum window size
c@92 810 * normmix is... what?
c@92 811 */
c@92 812 double
c@92 813 AdaptiveSpectrogram::MixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int
c@92 814 WID, int wid, bool normmix)
c@92 815 {
c@92 816 double ene[32];
c@92 817
c@92 818 // find the total energy and normalize
c@92 819
c@92 820 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
c@92 821
c@92 822 double lene = 0;
c@92 823
c@92 824 for (int fr = 0; fr < Fr; fr++) {
c@92 825 for (int k = 0; k < Wid; k++) {
c@92 826 lene += Specs[i][fr][k]*Specs[i][fr][k];
c@92 827 }
c@92 828 }
c@92 829
c@92 830 ene[i] = lene;
c@92 831
c@92 832 if (lene != 0) {
c@92 833 double ilene = 1.0/lene;
c@92 834 for (int fr = 0; fr < Fr; fr++) {
c@92 835 for (int k = 0; k < Wid; k++) {
c@92 836 Specs[i][fr][k] = Specs[i][fr][k]*Specs[i][fr][k]*ilene;
c@92 837 }
c@92 838 }
c@92 839 }
c@92 840 }
c@92 841
c@92 842
c@92 843 double result = DoCutSpectrogramBlock2
c@92 844 (spl, Specs, WID, 0, 0, 0, WID/wid, ene[31]);
c@92 845
c@92 846 // de-normalize
c@92 847
c@92 848 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
c@92 849 double lene = ene[i];
c@92 850 if (lene != 0) {
c@92 851 for (int fr = 0; fr < Fr; fr++) {
c@92 852 for (int k = 0; k < Wid; k++) {
c@92 853 Specs[i][fr][k] = sqrt(Specs[i][fr][k]*lene);
c@92 854 }
c@92 855 }
c@92 856 }
c@92 857 }
c@92 858
c@92 859 double e[32];
c@92 860 for (int i = 0; i < 32; i++) e[i] = 1;
c@92 861
c@92 862 DoMixSpectrogramBlock2
c@92 863 (spl, Spec, Specs, WID, 0, 0, 0, normmix, log2(WID/wid)+1, e);
c@92 864
c@92 865 return result;
c@92 866 }
c@92 867
c@92 868 /**
c@92 869 * MixSpectrogram2 does the work for Fr frames (the largest frame),
c@92 870 * which basically calls MixSpectrogramBlock2 Fr times.
c@92 871 *
c@92 872 * the 3-D array Specs is the multiple spectrograms calculated with
c@92 873 * window sizes between wid and WID, Specs[0] is the 0th spectrogram,
c@92 874 * etc.
c@92 875 *
c@92 876 * spl and Spec for all frames are returned by MixSpectrogram2, each
c@92 877 * as a 2-D array.
c@92 878 */
c@92 879 double
c@92 880 AdaptiveSpectrogram::MixSpectrogram2(int** spl, double** Spec, double*** Specs, int Fr,
c@92 881 int WID, int wid, bool norm, bool normmix)
c@92 882 {
c@92 883 // totally Fr frames of WID samples
c@92 884 // each frame is divided into wid VERTICAL parts
c@92 885
c@92 886 int Res = log2(WID/wid)+1;
c@92 887 double*** lSpecs = new double**[Res];
c@92 888
c@92 889 for (int i = 0; i < Fr; i++) {
c@92 890
c@92 891 for (int j = 0, nfr = 1; j < Res; j++, nfr *= 2) {
c@92 892 lSpecs[j] = &Specs[j][i*nfr];
c@92 893 }
c@92 894
c@92 895 MixSpectrogramBlock2(spl[i], Spec[i], lSpecs, WID, wid, norm);
c@92 896 }
c@92 897
c@92 898 delete[] lSpecs;
c@92 899 return 0;
c@92 900 }
c@92 901