qm-vamp-plugins: plugins/AdaptiveSpectrogram.cpp annotate

annotate plugins/AdaptiveSpectrogram.cpp @ 100:bae940a2ff18

* some experiments in working from the original paper for the adaptive spectrogram (to be continued)

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Mon, 27 Apr 2009 15:30:05 +0000
parents	8700a93424f4
children	ff383bfee463

rev	line source
c@92	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@92	2
c@92	3 /*
c@92	4 QM Vamp Plugin Set
c@92	5
c@92	6 Centre for Digital Music, Queen Mary, University of London.
c@92	7 All rights reserved.
c@92	8 */
c@92	9
c@92	10 #include "AdaptiveSpectrogram.h"
c@92	11
c@92	12 #include <cstdlib>
c@92	13 #include <cstring>
c@92	14
c@92	15 #include <iostream>
c@92	16
c@92	17 #include <dsp/transforms/FFT.h>
c@92	18
c@92	19 using std::string;
c@92	20 using std::vector;
c@92	21 using std::cerr;
c@92	22 using std::endl;
c@92	23
c@92	24 using Vamp::RealTime;
c@92	25
c@99	26 //#define DEBUG_VERBOSE 1
c@99	27
c@92	28 AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) :
c@92	29 Plugin(inputSampleRate),
c@92	30 m_w(9),
c@92	31 m_n(2)
c@100	32 // m_w(0),
c@100	33 // m_n(2)
c@92	34 {
c@92	35 }
c@92	36
c@92	37 AdaptiveSpectrogram::~AdaptiveSpectrogram()
c@92	38 {
c@92	39 }
c@92	40
c@92	41 string
c@92	42 AdaptiveSpectrogram::getIdentifier() const
c@92	43 {
c@93	44 return "qm-adaptivespectrogram";
c@92	45 }
c@92	46
c@92	47 string
c@92	48 AdaptiveSpectrogram::getName() const
c@92	49 {
c@92	50 return "Adaptive Spectrogram";
c@92	51 }
c@92	52
c@92	53 string
c@92	54 AdaptiveSpectrogram::getDescription() const
c@92	55 {
c@92	56 return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions";
c@92	57 }
c@92	58
c@92	59 string
c@92	60 AdaptiveSpectrogram::getMaker() const
c@92	61 {
c@92	62 return "Queen Mary, University of London";
c@92	63 }
c@92	64
c@92	65 int
c@92	66 AdaptiveSpectrogram::getPluginVersion() const
c@92	67 {
c@92	68 return 1;
c@92	69 }
c@92	70
c@92	71 string
c@92	72 AdaptiveSpectrogram::getCopyright() const
c@92	73 {
c@92	74 return "Plugin by Wen Xue and Chris Cannam. Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved";
c@92	75 }
c@92	76
c@92	77 size_t
c@92	78 AdaptiveSpectrogram::getPreferredStepSize() const
c@92	79 {
c@92	80 return ((2 << m_w) << m_n) / 2;
c@92	81 }
c@92	82
c@92	83 size_t
c@92	84 AdaptiveSpectrogram::getPreferredBlockSize() const
c@92	85 {
c@92	86 return (2 << m_w) << m_n;
c@92	87 }
c@92	88
c@92	89 bool
c@92	90 AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize)
c@92	91 {
c@92	92 if (channels < getMinChannelCount() \|\|
c@92	93 channels > getMaxChannelCount()) return false;
c@92	94
c@92	95 return true;
c@92	96 }
c@92	97
c@92	98 void
c@92	99 AdaptiveSpectrogram::reset()
c@92	100 {
c@92	101
c@92	102 }
c@92	103
c@92	104 AdaptiveSpectrogram::ParameterList
c@92	105 AdaptiveSpectrogram::getParameterDescriptors() const
c@92	106 {
c@92	107 ParameterList list;
c@92	108
c@92	109 ParameterDescriptor desc;
c@92	110 desc.identifier = "n";
c@92	111 desc.name = "Number of resolutions";
c@92	112 desc.description = "Number of consecutive powers of two to use as spectrogram resolutions, starting with the minimum resolution specified";
c@92	113 desc.unit = "";
c@92	114 desc.minValue = 1;
c@92	115 desc.maxValue = 10;
c@92	116 desc.defaultValue = 3;
c@92	117 desc.isQuantized = true;
c@92	118 desc.quantizeStep = 1;
c@92	119 list.push_back(desc);
c@92	120
c@92	121 ParameterDescriptor desc2;
c@92	122 desc2.identifier = "w";
c@92	123 desc2.name = "Smallest resolution";
c@92	124 desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions";
c@92	125 desc2.unit = "";
c@92	126 desc2.minValue = 1;
c@92	127 desc2.maxValue = 14;
c@92	128 desc2.defaultValue = 10;
c@92	129 desc2.isQuantized = true;
c@92	130 desc2.quantizeStep = 1;
c@92	131 // I am so lazy
c@92	132 desc2.valueNames.push_back("2");
c@92	133 desc2.valueNames.push_back("4");
c@92	134 desc2.valueNames.push_back("8");
c@92	135 desc2.valueNames.push_back("16");
c@92	136 desc2.valueNames.push_back("32");
c@92	137 desc2.valueNames.push_back("64");
c@92	138 desc2.valueNames.push_back("128");
c@92	139 desc2.valueNames.push_back("256");
c@92	140 desc2.valueNames.push_back("512");
c@92	141 desc2.valueNames.push_back("1024");
c@92	142 desc2.valueNames.push_back("2048");
c@92	143 desc2.valueNames.push_back("4096");
c@92	144 desc2.valueNames.push_back("8192");
c@92	145 desc2.valueNames.push_back("16384");
c@92	146 list.push_back(desc2);
c@92	147
c@92	148 return list;
c@92	149 }
c@92	150
c@92	151 float
c@92	152 AdaptiveSpectrogram::getParameter(std::string id) const
c@92	153 {
c@92	154 if (id == "n") return m_n+1;
c@92	155 else if (id == "w") return m_w+1;
c@92	156 return 0.f;
c@92	157 }
c@92	158
c@92	159 void
c@92	160 AdaptiveSpectrogram::setParameter(std::string id, float value)
c@92	161 {
c@92	162 if (id == "n") {
c@92	163 int n = lrintf(value);
c@92	164 if (n >= 1 && n <= 10) m_n = n-1;
c@92	165 } else if (id == "w") {
c@92	166 int w = lrintf(value);
c@92	167 if (w >= 1 && w <= 14) m_w = w-1;
c@92	168 }
c@92	169 }
c@92	170
c@92	171 AdaptiveSpectrogram::OutputList
c@92	172 AdaptiveSpectrogram::getOutputDescriptors() const
c@92	173 {
c@92	174 OutputList list;
c@92	175
c@92	176 OutputDescriptor d;
c@92	177 d.identifier = "output";
c@92	178 d.name = "Output";
c@92	179 d.description = "The output of the plugin";
c@92	180 d.unit = "";
c@92	181 d.hasFixedBinCount = true;
c@92	182 d.binCount = ((2 << m_w) << m_n) / 2;
c@92	183 d.hasKnownExtents = false;
c@92	184 d.isQuantized = false;
c@92	185 d.sampleType = OutputDescriptor::FixedSampleRate;
c@92	186 d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2);
c@92	187 d.hasDuration = false;
c@92	188 list.push_back(d);
c@92	189
c@92	190 return list;
c@92	191 }
c@92	192
c@92	193 AdaptiveSpectrogram::FeatureSet
c@92	194 AdaptiveSpectrogram::getRemainingFeatures()
c@92	195 {
c@92	196 FeatureSet fs;
c@92	197 return fs;
c@92	198 }
c@92	199
c@100	200 #ifdef NOT_DEFINED
c@92	201 AdaptiveSpectrogram::FeatureSet
c@92	202 AdaptiveSpectrogram::process(const float const inputBuffers, RealTime ts)
c@92	203 {
c@92	204 FeatureSet fs;
c@92	205
c@92	206 int wid = (2 << m_w), WID = ((2 << m_w) << m_n);
c@92	207 int Res = log2(WID/wid)+1;
c@92	208 double *specs = new double [Res];
c@92	209 int Wid = WID;
c@92	210 int wi = 0;
c@92	211
c@99	212 #ifdef DEBUG_VERBOSE
c@92	213 cerr << "wid = " << wid << ", WID = " << WID << endl;
c@99	214 #endif
c@92	215
c@92	216 double *tmpin = new double[WID];
c@92	217 double *tmprout = new double[WID];
c@92	218 double *tmpiout = new double[WID];
c@92	219
c@92	220 while (Wid >= wid) {
c@92	221 specs[wi] = new double *[WID/Wid];
c@92	222 for (int i = 0; i < WID/Wid; ++i) {
c@92	223 specs[wi][i] = new double[Wid/2];
c@92	224 int origin = WID/4 - Wid/4; // for 50% overlap
c@92	225 for (int j = 0; j < Wid; ++j) {
c@92	226 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / Wid);
c@92	227 tmpin[j] = inputBuffers[0][origin + i * Wid/2 + j] * mul;
c@92	228 }
c@92	229 FFT::process(Wid, false, tmpin, 0, tmprout, tmpiout);
c@92	230 for (int j = 0; j < Wid/2; ++j) {
c@99	231 int k = j+1; // include Nyquist but not DC
c@99	232 double mag = sqrt(tmprout[k] * tmprout[k] +
c@99	233 tmpiout[k] * tmpiout[k]);
c@99	234 double scaled = mag / (Wid/2);
c@99	235 // double power = scaled*scaled;
c@99	236 // if (k < Wid/2) power = power*2;
c@99	237 specs[wi][i][j] = scaled;
c@92	238 }
c@92	239 }
c@92	240 Wid /= 2;
c@92	241 ++wi;
c@92	242 }
c@92	243
c@99	244 /*
c@99	245 while (Wid >= wid) {
c@99	246 specs[wi] = new double *[WID/Wid];
c@99	247 cerr << "filling width " << Wid << endl;
c@99	248 for (int i = 0; i < WID/Wid; ++i) {
c@99	249 specs[wi][i] = new double[Wid/2];
c@99	250 for (int j = 0; j < Wid/2; ++j) {
c@99	251
c@99	252 specs[wi][i][j] = 0;
c@99	253 int x0 = i * Wid/2;
c@99	254 int x1 = (i+1) * Wid/2 - 1;
c@99	255 int y0 = j * (WID/Wid);
c@99	256 int y1 = (j+1) * (WID/Wid) - 1;
c@99	257
c@99	258 cerr << "box at " << i << "," << j << " covers [" << x0 << "," << y0 << "] to [" << x1 << "," << y1 << "]" << endl;
c@99	259
c@99	260 for (int y = WID/4; y < WID/2; ++y) {
c@99	261 for (int x = WID/4-2; x < WID/4; ++x) {
c@99	262 if (x >= x0 && x <= x1 && y >= y0 && y <= y1) {
c@99	263 ++specs[wi][i][j];
c@99	264 }
c@99	265 }
c@99	266 }
c@99	267
c@99	268 for (int x = 0; x < WID/2; ++x) {
c@99	269 int y = 0;
c@99	270 if (x >= x0 && x <= x1 && y >= y0 && y <= y1) {
c@99	271 ++specs[wi][i][j];
c@99	272 }
c@99	273 }
c@99	274 }
c@99	275 }
c@99	276 cerr << "this spectrogram:" << endl;
c@99	277 for (int j = Wid/2-1; j >= 0; --j) {
c@99	278 for (int i = 0; i < WID/Wid; ++i) {
c@99	279 cerr << specs[wi][i][j] << " ";
c@99	280 }
c@99	281 cerr << endl;
c@99	282 }
c@99	283 Wid /= 2;
c@99	284 ++wi;
c@99	285 }
c@99	286 */
c@99	287
c@92	288 int *spl = new int[WID/2];
c@92	289 double *spec = new double[WID/2];
c@92	290
c@92	291 // This prefill makes it easy to see which elements are actually
c@92	292 // set by the MixSpectrogramBlock2 call. Turns out that, with
c@92	293 // 1024, 2048 and 4096 as our widths, the spl array has elements
c@92	294 // 0-4094 (incl) filled in and the spec array has elements 0-4095
c@92	295
c@92	296 for (int i = 0; i < WID/2; ++i) {
c@92	297 spl[i] = i;
c@92	298 spec[i] = i;
c@92	299 }
c@92	300
c@92	301 MixSpectrogramBlock2(spl, spec, specs, WID/2, wid/2, false);
c@92	302
c@92	303 Wid = WID;
c@92	304 wi = 0;
c@92	305 while (Wid >= wid) {
c@92	306 for (int i = 0; i < WID/Wid; ++i) {
c@92	307 delete[] specs[wi][i];
c@92	308 }
c@92	309 delete[] specs[wi];
c@92	310 Wid /= 2;
c@92	311 ++wi;
c@92	312 }
c@92	313 delete[] specs;
c@92	314
c@99	315 #ifdef DEBUG_VERBOSE
c@92	316 std::cerr << "Results at " << ts << ":" << std::endl;
c@99	317 for (int i = 0; i < WID/2; ++i) {
c@99	318 // if (spl[i] == i \|\| spec[i] == i) {
c@99	319 // std::cerr << "\n***\n";
c@99	320 // }
c@92	321 std::cerr << "[" << i << "] " << spl[i] << "," << spec[i] << " ";
c@92	322 }
c@92	323 std::cerr << std::endl;
c@99	324 #endif
c@99	325
c@92	326 vector<vector<float> > rmat(WID/wid);
c@92	327 for (int i = 0; i < WID/wid; ++i) {
c@92	328 rmat[i] = vector<float>(WID/2);
c@92	329 }
c@92	330
c@92	331 int y = 0, h = WID/2;
c@92	332 int x = 0, w = WID/wid;
c@92	333 unpackResultMatrix(rmat, x, y, w, h, spl, spec, WID/2, WID);
c@92	334
c@92	335 delete[] spec;
c@92	336 delete[] spl;
c@92	337
c@92	338 for (int i = 0; i < rmat.size(); ++i) {
c@92	339 Feature f;
c@92	340 f.hasTimestamp = false;
c@92	341 f.values = rmat[i];
c@92	342 fs[0].push_back(f);
c@92	343 }
c@92	344
c@92	345 /*
c@92	346 if (m_stepSize == 0) {
c@92	347 cerr << "ERROR: AdaptiveSpectrogram::process: "
c@92	348 << "AdaptiveSpectrogram has not been initialised"
c@92	349 << endl;
c@92	350 return fs;
c@92	351 }
c@92	352 */
c@92	353 return fs;
c@92	354 }
c@100	355 #endif
c@92	356
c@100	357 AdaptiveSpectrogram::FeatureSet
c@100	358 AdaptiveSpectrogram::process(const float const inputBuffers, RealTime ts)
c@100	359 {
c@100	360 FeatureSet fs;
c@100	361
c@100	362 int minwid = (2 << m_w), maxwid = ((2 << m_w) << m_n);
c@100	363
c@100	364 cerr << "widths from " << minwid << " to " << maxwid << " ("
c@100	365 << minwid/2 << " to " << maxwid/2 << " in real parts)" << endl;
c@100	366
c@100	367 Spectrograms s(minwid/2, maxwid/2, 1);
c@100	368
c@100	369 double *tmpin = new double[maxwid];
c@100	370 double *tmprout = new double[maxwid];
c@100	371 double *tmpiout = new double[maxwid];
c@100	372
c@100	373 int w = minwid;
c@100	374 int index = 0;
c@100	375
c@100	376 while (w <= maxwid) {
c@100	377 for (int i = 0; i < maxwid / w; ++i) {
c@100	378 int origin = maxwid/4 - w/4; // for 50% overlap
c@100	379 for (int j = 0; j < w; ++j) {
c@100	380 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / w);
c@100	381 tmpin[j] = inputBuffers[0][origin + i * w/2 + j] * mul;
c@100	382 }
c@100	383 FFT::process(w, false, tmpin, 0, tmprout, tmpiout);
c@100	384 for (int j = 0; j < w/2; ++j) {
c@100	385 int k = j+1; // include Nyquist but not DC
c@100	386 double mag = sqrt(tmprout[k] * tmprout[k] +
c@100	387 tmpiout[k] * tmpiout[k]);
c@100	388 double scaled = mag / (w/2);
c@100	389 s.spectrograms[index]->data[i][j] = scaled;
c@100	390 }
c@100	391 }
c@100	392 w *= 2;
c@100	393 ++index;
c@100	394 }
c@100	395
c@100	396 Cutting *cutting = cut(s, maxwid/2, 0, 0, maxwid/2);
c@100	397
c@100	398 printCutting(cutting, " ");
c@100	399
c@100	400 vector<vector<float> > rmat(maxwid/minwid);
c@100	401 for (int i = 0; i < maxwid/minwid; ++i) {
c@100	402 rmat[i] = vector<float>(maxwid/2);
c@100	403 }
c@100	404
c@100	405 assemble(s, cutting, rmat, 0, 0, maxwid/minwid, maxwid/2);
c@100	406
c@100	407 delete cutting;
c@100	408
c@100	409 for (int i = 0; i < rmat.size(); ++i) {
c@100	410 Feature f;
c@100	411 f.hasTimestamp = false;
c@100	412 f.values = rmat[i];
c@100	413 fs[0].push_back(f);
c@100	414 }
c@100	415
c@100	416 return fs;
c@100	417 }
c@100	418
c@100	419 void
c@100	420 AdaptiveSpectrogram::printCutting(Cutting *c, string pfx)
c@100	421 {
c@100	422 if (c->first) {
c@100	423 if (c->cut == Cutting::Horizontal) {
c@100	424 cerr << pfx << "H" << endl;
c@100	425 } else if (c->cut == Cutting::Vertical) {
c@100	426 cerr << pfx << "V" << endl;
c@100	427 }
c@100	428 printCutting(c->first, pfx + " ");
c@100	429 printCutting(c->second, pfx + " ");
c@100	430 } else {
c@100	431 cerr << pfx << "* " << c->value << endl;
c@100	432 }
c@100	433 }
c@100	434
c@100	435 AdaptiveSpectrogram::Cutting *
c@100	436 AdaptiveSpectrogram::cut(const Spectrograms &s,
c@100	437 int res,
c@100	438 int x, int y, int h)
c@100	439 {
c@100	440 // cerr << "res = " << res << ", x = " << x << ", y = " << y << ", h = " << h << endl;
c@100	441
c@100	442 if (h > 1 && res > s.minres) {
c@100	443
c@100	444 // The "vertical" division is a top/bottom split.
c@100	445 // Splitting this way keeps us in the same resolution,
c@100	446 // but with two vertical subregions of height h/2.
c@100	447
c@100	448 Cutting *top = cut(s, res, x, y + h/2, h/2);
c@100	449 Cutting *bottom = cut(s, res, x, y, h/2);
c@100	450
c@100	451 // The "horizontal" division is a left/right split. Splitting
c@100	452 // this way places us in resolution res/2, which has lower
c@100	453 // vertical resolution but higher horizontal resolution. We
c@100	454 // need to double x accordingly.
c@100	455
c@100	456 Cutting left = cut(s, res/2, 2 x, y/2, h/2);
c@100	457 Cutting right = cut(s, res/2, 2 x + 1, y/2, h/2);
c@100	458
c@100	459 double vcost = top->cost + bottom->cost;
c@100	460 double hcost = left->cost + right->cost;
c@100	461
c@100	462 if (vcost > hcost) {
c@100	463
c@100	464 // cut horizontally (left/right)
c@100	465
c@100	466 Cutting *cutting = new Cutting;
c@100	467 cutting->cut = Cutting::Horizontal;
c@100	468 cutting->first = left;
c@100	469 cutting->second = right;
c@100	470 cutting->cost = hcost;
c@100	471 cutting->value = left->value + right->value;
c@100	472 delete top;
c@100	473 delete bottom;
c@100	474 return cutting;
c@100	475
c@100	476 } else {
c@100	477
c@100	478 Cutting *cutting = new Cutting;
c@100	479 cutting->cut = Cutting::Vertical;
c@100	480 cutting->first = top;
c@100	481 cutting->second = bottom;
c@100	482 cutting->cost = vcost;
c@100	483 cutting->value = top->value + bottom->value;
c@100	484 delete left;
c@100	485 delete right;
c@100	486 return cutting;
c@100	487 }
c@100	488
c@100	489 } else {
c@100	490
c@100	491 // no cuts possible from this level
c@100	492
c@100	493 Cutting *cutting = new Cutting;
c@100	494 cutting->cut = Cutting::Finished;
c@100	495 cutting->first = 0;
c@100	496 cutting->second = 0;
c@100	497
c@100	498 int n = 0;
c@100	499 for (int r = res; r > s.minres; r /= 2) ++n;
c@100	500 const Spectrogram *spectrogram = s.spectrograms[n];
c@100	501
c@100	502 cutting->cost = cost(*spectrogram, x, y);
c@100	503 cutting->value = value(*spectrogram, x, y);
c@100	504
c@100	505 // cerr << "cost for this cell: " << cutting->cost << endl;
c@100	506
c@100	507 return cutting;
c@100	508 }
c@100	509 }
c@100	510
c@100	511 void
c@100	512 AdaptiveSpectrogram::assemble(const Spectrograms &s,
c@100	513 const Cutting *cutting,
c@100	514 vector<vector<float> > &rmat,
c@100	515 int x, int y, int w, int h)
c@100	516 {
c@100	517 switch (cutting->cut) {
c@100	518
c@100	519 case Cutting::Finished:
c@100	520 for (int i = 0; i < w; ++i) {
c@100	521 for (int j = 0; j < h; ++j) {
c@100	522 rmat[x+i][y+j] = cutting->value;
c@100	523 }
c@100	524 }
c@100	525 return;
c@100	526
c@100	527 case Cutting::Horizontal:
c@100	528 assemble(s, cutting->first, rmat, x, y, w/2, h);
c@100	529 assemble(s, cutting->second, rmat, x+w/2, y, w/2, h);
c@100	530 break;
c@100	531
c@100	532 case Cutting::Vertical:
c@100	533 assemble(s, cutting->first, rmat, x, y+h/2, w, h/2);
c@100	534 assemble(s, cutting->second, rmat, x, y, w, h/2);
c@100	535 break;
c@100	536 }
c@100	537 }
c@100	538
c@92	539 void
c@92	540 AdaptiveSpectrogram::unpackResultMatrix(vector<vector<float> > &rmat,
c@92	541 int x, int y, int w, int h,
c@92	542 int *spl,
c@92	543 double *spec, int sz,
c@92	544 int res
c@92	545 )
c@92	546 {
c@92	547
c@99	548 #ifdef DEBUG_VERBOSE
c@92	549 cerr << "x = " << x << ", y = " << y << ", w = " << w << ", h = " << h
c@92	550 << ", sz = " << sz << ", spl = " << spl << ", spec = " << spec << ", res = " << res << endl;
c@99	551 #endif
c@92	552
c@92	553 if (sz <= 1) {
c@92	554
c@92	555 for (int i = 0; i < w; ++i) {
c@92	556 for (int j = 0; j < h; ++j) {
c@92	557 if (rmat[x+i][y+j] != 0) {
c@92	558 cerr << "WARNING: Overwriting value " << rmat[x+i][y+j]
c@92	559 << " with " << res + i + j << " at " << x+i << "," << y+j << endl;
c@92	560 }
c@92	561 rmat[x+i][y+j] = *spec;
c@92	562 }
c@92	563 }
c@92	564
c@92	565 return;
c@92	566 }
c@92	567
c@92	568 if (*spl == 0) {
c@92	569
c@92	570 unpackResultMatrix(rmat,
c@92	571 x, y,
c@92	572 w, h/2,
c@92	573 spl + 1,
c@92	574 spec,
c@92	575 sz/2,
c@92	576 res);
c@92	577
c@92	578 unpackResultMatrix(rmat,
c@92	579 x, y + h/2,
c@92	580 w, h/2,
c@92	581 spl + sz/2,
c@92	582 spec + sz/2,
c@92	583 sz/2,
c@92	584 res);
c@92	585
c@92	586 } else if (*spl == 1) {
c@92	587
c@92	588 unpackResultMatrix(rmat,
c@92	589 x, y,
c@92	590 w/2, h,
c@92	591 spl + 1,
c@92	592 spec,
c@92	593 sz/2,
c@92	594 res/2);
c@92	595
c@92	596 unpackResultMatrix(rmat,
c@92	597 x + w/2, y,
c@92	598 w/2, h,
c@92	599 spl + sz/2,
c@92	600 spec + sz/2,
c@92	601 sz/2,
c@92	602 res/2);
c@92	603
c@92	604 } else {
c@92	605 cerr << "ERROR: spl = " << spl << endl;
c@92	606 }
c@92	607 }
c@92	608
c@92	609 //spl[Y-1]
c@92	610 //Specs[R0][x0:x0+x-1][Y0:Y0+Y-1]
c@92	611 //Specs[R0+1][2x0:2x0+2x-1][Y0/2:Y0/2+Y/2-1]
c@92	612 //...
c@92	613 //Specs[R0+?][Nx0:Nx0+Nx-1][Y0/N:Y0/N+Y/N-1]
c@92	614 //N=WID/wid
c@92	615
c@92	616 /**
c@92	617 * DoCutSpectrogramBlock2 finds the optimal "cutting" and returns it in spl.
c@92	618 */
c@92	619 double
c@92	620 AdaptiveSpectrogram::DoCutSpectrogramBlock2(int* spl, double*** Specs, int Y, int R0,
c@99	621 int x0, int Y0, int N, double& ene, string pfx)
c@92	622 {
c@92	623 double ent = 0;
c@92	624
c@99	625 #ifdef DEBUG_VERBOSE
c@99	626 cerr << pfx << "cutting with Y = " << Y << ", R0 = " << R0 << ", x0 = " << x0 << ", Y0 = " << Y0 << ", N = " << N << endl;
c@99	627 #endif
c@99	628
c@92	629 if (Y > N) {
c@92	630
c@99	631 #ifdef DEBUG_VERBOSE
c@99	632 cerr << pfx << "Y > N case, making top/bottom cut" << endl;
c@99	633 #endif
c@99	634
c@92	635 spl[0] = 0;
c@92	636 double ene1, ene2;
c@92	637
c@92	638 ent += DoCutSpectrogramBlock2
c@99	639 (&spl[1], Specs, Y/2, R0, x0, Y0, N, ene1, pfx + " ");
c@92	640
c@92	641 ent += DoCutSpectrogramBlock2
c@99	642 (&spl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N, ene2, pfx + " ");
c@92	643
c@92	644 ene = ene1+ene2;
c@92	645
c@92	646 } else if (N == 1) {
c@92	647
c@92	648 double tmp = Specs[R0][x0][Y0];
c@99	649
c@99	650 #ifdef DEBUG_VERBOSE
c@99	651 cerr << pfx << "N == 1 case (value here = " << tmp << ")" << endl;
c@99	652 #endif
c@99	653
c@92	654 ene = tmp;
c@92	655 ent = xlogx(tmp);
c@92	656
c@92	657 } else {
c@92	658 // Y == N, the square case
c@92	659
c@99	660 #ifdef DEBUG_VERBOSE
c@99	661 cerr << pfx << "Y == N case, testing left/right cut" << endl;
c@99	662 #endif
c@99	663
c@92	664 double enel, ener, enet, eneb, entl, entr, entt, entb;
c@92	665 int* tmpspl = new int[Y];
c@92	666
c@92	667 entl = DoCutSpectrogramBlock2
c@99	668 (&spl[1], Specs, Y/2, R0+1, 2*x0, Y0/2, N/2, enel, pfx + " ");
c@92	669
c@92	670 entr = DoCutSpectrogramBlock2
c@99	671 (&spl[Y/2], Specs, Y/2, R0+1, 2*x0+1, Y0/2, N/2, ener, pfx + " ");
c@99	672
c@99	673 #ifdef DEBUG_VERBOSE
c@99	674 cerr << pfx << "Y == N case, testing top/bottom cut" << endl;
c@99	675 #endif
c@92	676
c@92	677 entb = DoCutSpectrogramBlock2
c@99	678 (&tmpspl[1], Specs, Y/2, R0, x0, Y0, N/2, eneb, pfx + " ");
c@92	679
c@92	680 entt = DoCutSpectrogramBlock2
c@99	681 (&tmpspl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N/2, enet, pfx + " ");
c@92	682
c@92	683 double
c@92	684 ene0 = enet + eneb,
c@92	685 ene1 = enel + ener,
c@92	686 ent0 = entt + entb,
c@92	687 ent1 = entl + entr;
c@92	688
c@92	689 // normalize
c@92	690
c@92	691 double eneres = 1 - (ene0+ene1)/2, norment0, norment1;
c@92	692 double a0 = 1 / (ene0+eneres), a1 = 1 / (ene1+eneres);
c@92	693
c@92	694 // quasi-global normalization
c@92	695
c@99	696 // norment0 = (ent0 - ene0 * log(ene0+eneres)) / (ene0+eneres);
c@99	697 // norment1 = (ent1 - ene1 * log(ene1+eneres)) / (ene1+eneres);
c@99	698 norment0 = ene0;
c@99	699 norment1 = ene1;
c@92	700
c@92	701 // local normalization
c@92	702
c@92	703 if (norment1 < norment0) {
c@99	704 #ifdef DEBUG_VERBOSE
c@99	705 cerr << pfx << "top/bottom cut wins (" << norment0 << " > " << norment1 << "), returning sum ent " << ent0 << " and ene " << ene0 << endl;
c@99	706 #endif
c@92	707 spl[0] = 0;
c@92	708 ent = ent0, ene = ene0;
c@92	709 memcpy(&spl[1], &tmpspl[1], sizeof(int)*(Y-2));
c@92	710 } else {
c@99	711 #ifdef DEBUG_VERBOSE
c@99	712 cerr << pfx << "left/right cut wins (" << norment1 << " >= " << norment0 << "), returning sum ent " << ent1 << " and ene " << ene1 << endl;
c@99	713 #endif
c@92	714 spl[0] = 1;
c@92	715 ent = ent1, ene = ene1;
c@92	716 }
c@92	717 }
c@92	718 return ent;
c@92	719 }
c@92	720
c@92	721 /**
c@92	722 * DoMixSpectrogramBlock2 collects values from the multiple
c@92	723 * spectrograms Specs into a linear array Spec.
c@92	724 */
c@92	725 double
c@92	726 AdaptiveSpectrogram::DoMixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int Y,
c@92	727 int R0, int x0, int Y0, bool normmix, int res,
c@92	728 double* e)
c@92	729 {
c@92	730 if (Y == 1) {
c@92	731
c@92	732 Spec[0] = Specs[R0][x0][Y0]*e[0];
c@92	733
c@92	734 } else {
c@92	735
c@92	736 double le[32];
c@92	737
c@92	738 if (normmix && Y < (1<<res)) {
c@92	739
c@92	740 for (int i = 0, j = 1, k = Y;
c@92	741 i < res;
c@92	742 i++, j *= 2, k /= 2) {
c@92	743
c@92	744 double lle = 0;
c@92	745
c@92	746 for (int fr = 0; fr < j; fr++) {
c@92	747 for (int n = 0; n < k; n++) {
c@92	748 lle +=
c@92	749 Specs[i+R0][x0+fr][Y0+n] *
c@92	750 Specs[i+R0][x0+fr][Y0+n];
c@92	751 }
c@92	752 }
c@92	753
c@92	754 lle = sqrt(lle)*e[i];
c@92	755
c@92	756 if (i == 0) {
c@92	757 le[0] = lle;
c@92	758 } else if (lle > le[0]*2) {
c@92	759 le[i] = e[i]le[0]2/lle;
c@92	760 } else {
c@92	761 le[i] = e[i];
c@92	762 }
c@92	763 }
c@92	764
c@92	765 le[0] = e[0];
c@92	766
c@92	767 } else {
c@92	768
c@92	769 memcpy(le, e, sizeof(double)*res);
c@92	770 }
c@92	771
c@92	772 if (spl[0] == 0) {
c@92	773
c@92	774 int newres;
c@92	775 if (Y >= (1<<res)) newres = res;
c@92	776 else newres = res-1;
c@92	777
c@92	778 DoMixSpectrogramBlock2
c@92	779 (&spl[1], Spec, Specs, Y/2, R0, x0, Y0,
c@92	780 normmix, newres, le);
c@92	781
c@92	782 DoMixSpectrogramBlock2
c@92	783 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0, x0, Y0+Y/2,
c@92	784 normmix, newres, le);
c@92	785
c@92	786 } else {
c@92	787
c@92	788 DoMixSpectrogramBlock2
c@92	789 (&spl[1], Spec, Specs, Y/2, R0+1, x0*2, Y0/2,
c@92	790 normmix, res-1, &le[1]);
c@92	791
c@92	792 DoMixSpectrogramBlock2
c@92	793 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0+1, x0*2+1, Y0/2,
c@92	794 normmix, res-1, &le[1]);
c@92	795 }
c@92	796 }
c@92	797
c@92	798 return 0;
c@92	799 }
c@92	800
c@92	801 /**
c@92	802 * MixSpectrogramBlock2 calls the two Do...() to do the real work.
c@92	803 *
c@92	804 * At this point:
c@92	805 * spl is... what? the returned "cutting", organised how?
c@92	806 * Spec is... what? the returned spectrogram, organised how?
c@92	807 * Specs is an array of input spectrograms
c@92	808 * WID is the maximum window size
c@92	809 * wid is the minimum window size
c@92	810 * normmix is... what?
c@92	811 */
c@92	812 double
c@92	813 AdaptiveSpectrogram::MixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int
c@92	814 WID, int wid, bool normmix)
c@92	815 {
c@92	816 double ene[32];
c@92	817
c@92	818 // find the total energy and normalize
c@92	819
c@92	820 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
c@92	821
c@92	822 double lene = 0;
c@92	823
c@92	824 for (int fr = 0; fr < Fr; fr++) {
c@92	825 for (int k = 0; k < Wid; k++) {
c@92	826 lene += Specs[i][fr][k]*Specs[i][fr][k];
c@92	827 }
c@92	828 }
c@92	829
c@92	830 ene[i] = lene;
c@92	831
c@92	832 if (lene != 0) {
c@92	833 double ilene = 1.0/lene;
c@92	834 for (int fr = 0; fr < Fr; fr++) {
c@92	835 for (int k = 0; k < Wid; k++) {
c@92	836 Specs[i][fr][k] = Specs[i][fr][k]Specs[i][fr][k]ilene;
c@92	837 }
c@92	838 }
c@92	839 }
c@92	840 }
c@92	841
c@92	842
c@92	843 double result = DoCutSpectrogramBlock2
c@92	844 (spl, Specs, WID, 0, 0, 0, WID/wid, ene[31]);
c@92	845
c@92	846 // de-normalize
c@92	847
c@92	848 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
c@92	849 double lene = ene[i];
c@92	850 if (lene != 0) {
c@92	851 for (int fr = 0; fr < Fr; fr++) {
c@92	852 for (int k = 0; k < Wid; k++) {
c@92	853 Specs[i][fr][k] = sqrt(Specs[i][fr][k]*lene);
c@92	854 }
c@92	855 }
c@92	856 }
c@92	857 }
c@92	858
c@92	859 double e[32];
c@92	860 for (int i = 0; i < 32; i++) e[i] = 1;
c@92	861
c@92	862 DoMixSpectrogramBlock2
c@92	863 (spl, Spec, Specs, WID, 0, 0, 0, normmix, log2(WID/wid)+1, e);
c@92	864
c@92	865 return result;
c@92	866 }
c@92	867
c@92	868 /**
c@92	869 * MixSpectrogram2 does the work for Fr frames (the largest frame),
c@92	870 * which basically calls MixSpectrogramBlock2 Fr times.
c@92	871 *
c@92	872 * the 3-D array Specs is the multiple spectrograms calculated with
c@92	873 * window sizes between wid and WID, Specs[0] is the 0th spectrogram,
c@92	874 * etc.
c@92	875 *
c@92	876 * spl and Spec for all frames are returned by MixSpectrogram2, each
c@92	877 * as a 2-D array.
c@92	878 */
c@92	879 double
c@92	880 AdaptiveSpectrogram::MixSpectrogram2(int spl, double Spec, double*** Specs, int Fr,
c@92	881 int WID, int wid, bool norm, bool normmix)
c@92	882 {
c@92	883 // totally Fr frames of WID samples
c@92	884 // each frame is divided into wid VERTICAL parts
c@92	885
c@92	886 int Res = log2(WID/wid)+1;
c@92	887 double* lSpecs = new double[Res];
c@92	888
c@92	889 for (int i = 0; i < Fr; i++) {
c@92	890
c@92	891 for (int j = 0, nfr = 1; j < Res; j++, nfr *= 2) {
c@92	892 lSpecs[j] = &Specs[j][i*nfr];
c@92	893 }
c@92	894
c@92	895 MixSpectrogramBlock2(spl[i], Spec[i], lSpecs, WID, wid, norm);
c@92	896 }
c@92	897
c@92	898 delete[] lSpecs;
c@92	899 return 0;
c@92	900 }
c@92	901

Mercurial > hg > qm-vamp-plugins

annotate plugins/AdaptiveSpectrogram.cpp @ 100:bae940a2ff18