comparison plugins/AdaptiveSpectrogram.cpp @ 92:3602e755b696

* Add the Adaptive Spectrogram plugin -- but it isn't working correctly yet. Also, when it does work, it will need to be refactored out into the qm-dsp library
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 27 Feb 2009 10:45:10 +0000
parents
children 385bec9df059
comparison
equal deleted inserted replaced
91:93f7edb0564b 92:3602e755b696
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 QM Vamp Plugin Set
5
6 Centre for Digital Music, Queen Mary, University of London.
7 All rights reserved.
8 */
9
10 #include "AdaptiveSpectrogram.h"
11
12 #include <cstdlib>
13 #include <cstring>
14
15 #include <iostream>
16
17 #include <dsp/transforms/FFT.h>
18
19 using std::string;
20 using std::vector;
21 using std::cerr;
22 using std::endl;
23
24 using Vamp::RealTime;
25
26 AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) :
27 Plugin(inputSampleRate),
28 m_w(9),
29 m_n(2)
30 {
31 }
32
33 AdaptiveSpectrogram::~AdaptiveSpectrogram()
34 {
35 }
36
37 string
38 AdaptiveSpectrogram::getIdentifier() const
39 {
40 return "adaptivespectrogram";
41 }
42
43 string
44 AdaptiveSpectrogram::getName() const
45 {
46 return "Adaptive Spectrogram";
47 }
48
49 string
50 AdaptiveSpectrogram::getDescription() const
51 {
52 return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions";
53 }
54
55 string
56 AdaptiveSpectrogram::getMaker() const
57 {
58 return "Queen Mary, University of London";
59 }
60
61 int
62 AdaptiveSpectrogram::getPluginVersion() const
63 {
64 return 1;
65 }
66
67 string
68 AdaptiveSpectrogram::getCopyright() const
69 {
70 return "Plugin by Wen Xue and Chris Cannam. Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved";
71 }
72
73 size_t
74 AdaptiveSpectrogram::getPreferredStepSize() const
75 {
76 return ((2 << m_w) << m_n) / 2;
77 }
78
79 size_t
80 AdaptiveSpectrogram::getPreferredBlockSize() const
81 {
82 return (2 << m_w) << m_n;
83 }
84
85 bool
86 AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize)
87 {
88 if (channels < getMinChannelCount() ||
89 channels > getMaxChannelCount()) return false;
90
91 return true;
92 }
93
94 void
95 AdaptiveSpectrogram::reset()
96 {
97
98 }
99
100 AdaptiveSpectrogram::ParameterList
101 AdaptiveSpectrogram::getParameterDescriptors() const
102 {
103 ParameterList list;
104
105 ParameterDescriptor desc;
106 desc.identifier = "n";
107 desc.name = "Number of resolutions";
108 desc.description = "Number of consecutive powers of two to use as spectrogram resolutions, starting with the minimum resolution specified";
109 desc.unit = "";
110 desc.minValue = 1;
111 desc.maxValue = 10;
112 desc.defaultValue = 3;
113 desc.isQuantized = true;
114 desc.quantizeStep = 1;
115 list.push_back(desc);
116
117 ParameterDescriptor desc2;
118 desc2.identifier = "w";
119 desc2.name = "Smallest resolution";
120 desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions";
121 desc2.unit = "";
122 desc2.minValue = 1;
123 desc2.maxValue = 14;
124 desc2.defaultValue = 10;
125 desc2.isQuantized = true;
126 desc2.quantizeStep = 1;
127 // I am so lazy
128 desc2.valueNames.push_back("2");
129 desc2.valueNames.push_back("4");
130 desc2.valueNames.push_back("8");
131 desc2.valueNames.push_back("16");
132 desc2.valueNames.push_back("32");
133 desc2.valueNames.push_back("64");
134 desc2.valueNames.push_back("128");
135 desc2.valueNames.push_back("256");
136 desc2.valueNames.push_back("512");
137 desc2.valueNames.push_back("1024");
138 desc2.valueNames.push_back("2048");
139 desc2.valueNames.push_back("4096");
140 desc2.valueNames.push_back("8192");
141 desc2.valueNames.push_back("16384");
142 list.push_back(desc2);
143
144 return list;
145 }
146
147 float
148 AdaptiveSpectrogram::getParameter(std::string id) const
149 {
150 if (id == "n") return m_n+1;
151 else if (id == "w") return m_w+1;
152 return 0.f;
153 }
154
155 void
156 AdaptiveSpectrogram::setParameter(std::string id, float value)
157 {
158 if (id == "n") {
159 int n = lrintf(value);
160 if (n >= 1 && n <= 10) m_n = n-1;
161 } else if (id == "w") {
162 int w = lrintf(value);
163 if (w >= 1 && w <= 14) m_w = w-1;
164 }
165 }
166
167 AdaptiveSpectrogram::OutputList
168 AdaptiveSpectrogram::getOutputDescriptors() const
169 {
170 OutputList list;
171
172 OutputDescriptor d;
173 d.identifier = "output";
174 d.name = "Output";
175 d.description = "The output of the plugin";
176 d.unit = "";
177 d.hasFixedBinCount = true;
178 d.binCount = ((2 << m_w) << m_n) / 2;
179 d.hasKnownExtents = false;
180 d.isQuantized = false;
181 d.sampleType = OutputDescriptor::FixedSampleRate;
182 d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2);
183 d.hasDuration = false;
184 list.push_back(d);
185
186 return list;
187 }
188
189 AdaptiveSpectrogram::FeatureSet
190 AdaptiveSpectrogram::getRemainingFeatures()
191 {
192 FeatureSet fs;
193 return fs;
194 }
195
196 AdaptiveSpectrogram::FeatureSet
197 AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts)
198 {
199 FeatureSet fs;
200
201 int wid = (2 << m_w), WID = ((2 << m_w) << m_n);
202 int Res = log2(WID/wid)+1;
203 double ***specs = new double **[Res];
204 int Wid = WID;
205 int wi = 0;
206
207 cerr << "wid = " << wid << ", WID = " << WID << endl;
208
209 double *tmpin = new double[WID];
210 double *tmprout = new double[WID];
211 double *tmpiout = new double[WID];
212
213 while (Wid >= wid) {
214 specs[wi] = new double *[WID/Wid];
215 for (int i = 0; i < WID/Wid; ++i) {
216 specs[wi][i] = new double[Wid/2];
217 int origin = WID/4 - Wid/4; // for 50% overlap
218 for (int j = 0; j < Wid; ++j) {
219 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / Wid);
220 tmpin[j] = inputBuffers[0][origin + i * Wid/2 + j] * mul;
221 }
222 FFT::process(Wid, false, tmpin, 0, tmprout, tmpiout);
223 for (int j = 0; j < Wid/2; ++j) {
224 double mag =
225 tmprout[j] * tmprout[j] +
226 tmpiout[j] * tmpiout[j];
227 specs[wi][i][j] = sqrt(mag) / Wid;
228 }
229 }
230 Wid /= 2;
231 ++wi;
232 }
233
234 int *spl = new int[WID/2];
235 double *spec = new double[WID/2];
236
237 // This prefill makes it easy to see which elements are actually
238 // set by the MixSpectrogramBlock2 call. Turns out that, with
239 // 1024, 2048 and 4096 as our widths, the spl array has elements
240 // 0-4094 (incl) filled in and the spec array has elements 0-4095
241
242 for (int i = 0; i < WID/2; ++i) {
243 spl[i] = i;
244 spec[i] = i;
245 }
246
247 MixSpectrogramBlock2(spl, spec, specs, WID/2, wid/2, false);
248
249 Wid = WID;
250 wi = 0;
251 while (Wid >= wid) {
252 for (int i = 0; i < WID/Wid; ++i) {
253 delete[] specs[wi][i];
254 }
255 delete[] specs[wi];
256 Wid /= 2;
257 ++wi;
258 }
259 delete[] specs;
260
261 std::cerr << "Results at " << ts << ":" << std::endl;
262 /* for (int i = 0; i < WID/2; ++i) {
263 if (spl[i] == i || spec[i] == i) {
264 std::cerr << "\n***\n";
265 }
266 std::cerr << "[" << i << "] " << spl[i] << "," << spec[i] << " ";
267 }
268 std::cerr << std::endl;
269 */
270 vector<vector<float> > rmat(WID/wid);
271 for (int i = 0; i < WID/wid; ++i) {
272 rmat[i] = vector<float>(WID/2);
273 }
274
275 int y = 0, h = WID/2;
276 int x = 0, w = WID/wid;
277 unpackResultMatrix(rmat, x, y, w, h, spl, spec, WID/2, WID);
278
279 delete[] spec;
280 delete[] spl;
281
282 for (int i = 0; i < rmat.size(); ++i) {
283 Feature f;
284 f.hasTimestamp = false;
285 f.values = rmat[i];
286 fs[0].push_back(f);
287 }
288
289 /*
290 if (m_stepSize == 0) {
291 cerr << "ERROR: AdaptiveSpectrogram::process: "
292 << "AdaptiveSpectrogram has not been initialised"
293 << endl;
294 return fs;
295 }
296 */
297 return fs;
298 }
299
300 void
301 AdaptiveSpectrogram::unpackResultMatrix(vector<vector<float> > &rmat,
302 int x, int y, int w, int h,
303 int *spl,
304 double *spec, int sz,
305 int res
306 )
307 {
308
309 cerr << "x = " << x << ", y = " << y << ", w = " << w << ", h = " << h
310 << ", sz = " << sz << ", *spl = " << *spl << ", *spec = " << *spec << ", res = " << res << endl;
311
312 if (sz <= 1) {
313
314 for (int i = 0; i < w; ++i) {
315 for (int j = 0; j < h; ++j) {
316 // rmat[x+i][y+j] = (off ? 0 : *spec);
317 if (rmat[x+i][y+j] != 0) {
318 cerr << "WARNING: Overwriting value " << rmat[x+i][y+j]
319 << " with " << res + i + j << " at " << x+i << "," << y+j << endl;
320 }
321 // cerr << "[" << x+i << "][" << y+j << "] <= " << res+i+j << endl;
322 rmat[x+i][y+j] = *spec;
323 }
324 }
325
326 // cerr << " (done)" << endl;
327 return;
328 }
329 // cerr << endl;
330
331 if (*spl == 0) {
332
333 unpackResultMatrix(rmat,
334 x, y,
335 w, h/2,
336 spl + 1,
337 spec,
338 sz/2,
339 res);
340
341 unpackResultMatrix(rmat,
342 x, y + h/2,
343 w, h/2,
344 spl + sz/2,
345 spec + sz/2,
346 sz/2,
347 res);
348
349 } else if (*spl == 1) {
350
351 unpackResultMatrix(rmat,
352 x, y,
353 w/2, h,
354 spl + 1,
355 spec,
356 sz/2,
357 res/2);
358
359 unpackResultMatrix(rmat,
360 x + w/2, y,
361 w/2, h,
362 spl + sz/2,
363 spec + sz/2,
364 sz/2,
365 res/2);
366
367 } else {
368 cerr << "ERROR: *spl = " << *spl << endl;
369 }
370 }
371
372 //spl[Y-1]
373 //Specs[R0][x0:x0+x-1][Y0:Y0+Y-1]
374 //Specs[R0+1][2x0:2x0+2x-1][Y0/2:Y0/2+Y/2-1]
375 //...
376 //Specs[R0+?][Nx0:Nx0+Nx-1][Y0/N:Y0/N+Y/N-1]
377 //N=WID/wid
378
379 /**
380 * DoCutSpectrogramBlock2 finds the optimal "cutting" and returns it in spl.
381 */
382 double
383 AdaptiveSpectrogram::DoCutSpectrogramBlock2(int* spl, double*** Specs, int Y, int R0,
384 int x0, int Y0, int N, double& ene)
385 {
386 double ent = 0;
387
388 if (Y > N) {
389
390 spl[0] = 0;
391 double ene1, ene2;
392
393 ent += DoCutSpectrogramBlock2
394 (&spl[1], Specs, Y/2, R0, x0, Y0, N, ene1);
395
396 ent += DoCutSpectrogramBlock2
397 (&spl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N, ene2);
398
399 ene = ene1+ene2;
400
401 } else if (N == 1) {
402
403 double tmp = Specs[R0][x0][Y0];
404 ene = tmp;
405 ent = xlogx(tmp);
406
407 } else {
408 // Y == N, the square case
409
410 double enel, ener, enet, eneb, entl, entr, entt, entb;
411 int* tmpspl = new int[Y];
412
413 entl = DoCutSpectrogramBlock2
414 (&spl[1], Specs, Y/2, R0+1, 2*x0, Y0/2, N/2, enel);
415
416 entr = DoCutSpectrogramBlock2
417 (&spl[Y/2], Specs, Y/2, R0+1, 2*x0+1, Y0/2, N/2, ener);
418
419 entb = DoCutSpectrogramBlock2
420 (&tmpspl[1], Specs, Y/2, R0, x0, Y0, N/2, eneb);
421
422 entt = DoCutSpectrogramBlock2
423 (&tmpspl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N/2, enet);
424
425 double
426 ene0 = enet + eneb,
427 ene1 = enel + ener,
428 ent0 = entt + entb,
429 ent1 = entl + entr;
430
431 // normalize
432
433 double eneres = 1 - (ene0+ene1)/2, norment0, norment1;
434 double a0 = 1 / (ene0+eneres), a1 = 1 / (ene1+eneres);
435
436 // quasi-global normalization
437
438 norment0 = (ent0 - ene0 * log(ene0+eneres)) / (ene0+eneres);
439 norment1 = (ent1 - ene1 * log(ene1+eneres)) / (ene1+eneres);
440
441 // local normalization
442
443 if (norment1 < norment0) {
444 spl[0] = 0;
445 ent = ent0, ene = ene0;
446 memcpy(&spl[1], &tmpspl[1], sizeof(int)*(Y-2));
447 } else {
448 spl[0] = 1;
449 ent = ent1, ene = ene1;
450 }
451 }
452 return ent;
453 }
454
455 /**
456 * DoMixSpectrogramBlock2 collects values from the multiple
457 * spectrograms Specs into a linear array Spec.
458 */
459 double
460 AdaptiveSpectrogram::DoMixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int Y,
461 int R0, int x0, int Y0, bool normmix, int res,
462 double* e)
463 {
464 if (Y == 1) {
465
466 Spec[0] = Specs[R0][x0][Y0]*e[0];
467
468 } else {
469
470 double le[32];
471
472 if (normmix && Y < (1<<res)) {
473
474 for (int i = 0, j = 1, k = Y;
475 i < res;
476 i++, j *= 2, k /= 2) {
477
478 double lle = 0;
479
480 for (int fr = 0; fr < j; fr++) {
481 for (int n = 0; n < k; n++) {
482 lle +=
483 Specs[i+R0][x0+fr][Y0+n] *
484 Specs[i+R0][x0+fr][Y0+n];
485 }
486 }
487
488 lle = sqrt(lle)*e[i];
489
490 if (i == 0) {
491 le[0] = lle;
492 } else if (lle > le[0]*2) {
493 le[i] = e[i]*le[0]*2/lle;
494 } else {
495 le[i] = e[i];
496 }
497 }
498
499 le[0] = e[0];
500
501 } else {
502
503 memcpy(le, e, sizeof(double)*res);
504 }
505
506 if (spl[0] == 0) {
507
508 int newres;
509 if (Y >= (1<<res)) newres = res;
510 else newres = res-1;
511
512 DoMixSpectrogramBlock2
513 (&spl[1], Spec, Specs, Y/2, R0, x0, Y0,
514 normmix, newres, le);
515
516 DoMixSpectrogramBlock2
517 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0, x0, Y0+Y/2,
518 normmix, newres, le);
519
520 } else {
521
522 DoMixSpectrogramBlock2
523 (&spl[1], Spec, Specs, Y/2, R0+1, x0*2, Y0/2,
524 normmix, res-1, &le[1]);
525
526 DoMixSpectrogramBlock2
527 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0+1, x0*2+1, Y0/2,
528 normmix, res-1, &le[1]);
529 }
530 }
531
532 return 0;
533 }
534
535 /**
536 * MixSpectrogramBlock2 calls the two Do...() to do the real work.
537 *
538 * At this point:
539 * spl is... what? the returned "cutting", organised how?
540 * Spec is... what? the returned spectrogram, organised how?
541 * Specs is an array of input spectrograms
542 * WID is the maximum window size
543 * wid is the minimum window size
544 * normmix is... what?
545 */
546 double
547 AdaptiveSpectrogram::MixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int
548 WID, int wid, bool normmix)
549 {
550 double ene[32];
551
552 // find the total energy and normalize
553
554 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
555
556 double lene = 0;
557
558 for (int fr = 0; fr < Fr; fr++) {
559 for (int k = 0; k < Wid; k++) {
560 lene += Specs[i][fr][k]*Specs[i][fr][k];
561 }
562 }
563
564 ene[i] = lene;
565
566 if (lene != 0) {
567 double ilene = 1.0/lene;
568 for (int fr = 0; fr < Fr; fr++) {
569 for (int k = 0; k < Wid; k++) {
570 Specs[i][fr][k] = Specs[i][fr][k]*Specs[i][fr][k]*ilene;
571 }
572 }
573 }
574 }
575
576
577 double result = DoCutSpectrogramBlock2
578 (spl, Specs, WID, 0, 0, 0, WID/wid, ene[31]);
579
580 // de-normalize
581
582 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
583 double lene = ene[i];
584 if (lene != 0) {
585 for (int fr = 0; fr < Fr; fr++) {
586 for (int k = 0; k < Wid; k++) {
587 Specs[i][fr][k] = sqrt(Specs[i][fr][k]*lene);
588 }
589 }
590 }
591 }
592
593 double e[32];
594 for (int i = 0; i < 32; i++) e[i] = 1;
595
596 DoMixSpectrogramBlock2
597 (spl, Spec, Specs, WID, 0, 0, 0, normmix, log2(WID/wid)+1, e);
598
599 return result;
600 }
601
602 /**
603 * MixSpectrogram2 does the work for Fr frames (the largest frame),
604 * which basically calls MixSpectrogramBlock2 Fr times.
605 *
606 * the 3-D array Specs is the multiple spectrograms calculated with
607 * window sizes between wid and WID, Specs[0] is the 0th spectrogram,
608 * etc.
609 *
610 * spl and Spec for all frames are returned by MixSpectrogram2, each
611 * as a 2-D array.
612 */
613 double
614 AdaptiveSpectrogram::MixSpectrogram2(int** spl, double** Spec, double*** Specs, int Fr,
615 int WID, int wid, bool norm, bool normmix)
616 {
617 // totally Fr frames of WID samples
618 // each frame is divided into wid VERTICAL parts
619
620 int Res = log2(WID/wid)+1;
621 double*** lSpecs = new double**[Res];
622
623 for (int i = 0; i < Fr; i++) {
624
625 for (int j = 0, nfr = 1; j < Res; j++, nfr *= 2) {
626 lSpecs[j] = &Specs[j][i*nfr];
627 }
628
629 MixSpectrogramBlock2(spl[i], Spec[i], lSpecs, WID, wid, norm);
630 }
631
632 delete[] lSpecs;
633 return 0;
634 }
635