Mercurial > hg > qm-vamp-plugins
comparison plugins/AdaptiveSpectrogram.cpp @ 92:3602e755b696
* Add the Adaptive Spectrogram plugin -- but it isn't working correctly
yet. Also, when it does work, it will need to be refactored out into
the qm-dsp library
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Fri, 27 Feb 2009 10:45:10 +0000 |
parents | |
children | 385bec9df059 |
comparison
equal
deleted
inserted
replaced
91:93f7edb0564b | 92:3602e755b696 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 QM Vamp Plugin Set | |
5 | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 All rights reserved. | |
8 */ | |
9 | |
10 #include "AdaptiveSpectrogram.h" | |
11 | |
12 #include <cstdlib> | |
13 #include <cstring> | |
14 | |
15 #include <iostream> | |
16 | |
17 #include <dsp/transforms/FFT.h> | |
18 | |
19 using std::string; | |
20 using std::vector; | |
21 using std::cerr; | |
22 using std::endl; | |
23 | |
24 using Vamp::RealTime; | |
25 | |
26 AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) : | |
27 Plugin(inputSampleRate), | |
28 m_w(9), | |
29 m_n(2) | |
30 { | |
31 } | |
32 | |
33 AdaptiveSpectrogram::~AdaptiveSpectrogram() | |
34 { | |
35 } | |
36 | |
37 string | |
38 AdaptiveSpectrogram::getIdentifier() const | |
39 { | |
40 return "adaptivespectrogram"; | |
41 } | |
42 | |
43 string | |
44 AdaptiveSpectrogram::getName() const | |
45 { | |
46 return "Adaptive Spectrogram"; | |
47 } | |
48 | |
49 string | |
50 AdaptiveSpectrogram::getDescription() const | |
51 { | |
52 return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions"; | |
53 } | |
54 | |
55 string | |
56 AdaptiveSpectrogram::getMaker() const | |
57 { | |
58 return "Queen Mary, University of London"; | |
59 } | |
60 | |
61 int | |
62 AdaptiveSpectrogram::getPluginVersion() const | |
63 { | |
64 return 1; | |
65 } | |
66 | |
67 string | |
68 AdaptiveSpectrogram::getCopyright() const | |
69 { | |
70 return "Plugin by Wen Xue and Chris Cannam. Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved"; | |
71 } | |
72 | |
73 size_t | |
74 AdaptiveSpectrogram::getPreferredStepSize() const | |
75 { | |
76 return ((2 << m_w) << m_n) / 2; | |
77 } | |
78 | |
79 size_t | |
80 AdaptiveSpectrogram::getPreferredBlockSize() const | |
81 { | |
82 return (2 << m_w) << m_n; | |
83 } | |
84 | |
85 bool | |
86 AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize) | |
87 { | |
88 if (channels < getMinChannelCount() || | |
89 channels > getMaxChannelCount()) return false; | |
90 | |
91 return true; | |
92 } | |
93 | |
94 void | |
95 AdaptiveSpectrogram::reset() | |
96 { | |
97 | |
98 } | |
99 | |
100 AdaptiveSpectrogram::ParameterList | |
101 AdaptiveSpectrogram::getParameterDescriptors() const | |
102 { | |
103 ParameterList list; | |
104 | |
105 ParameterDescriptor desc; | |
106 desc.identifier = "n"; | |
107 desc.name = "Number of resolutions"; | |
108 desc.description = "Number of consecutive powers of two to use as spectrogram resolutions, starting with the minimum resolution specified"; | |
109 desc.unit = ""; | |
110 desc.minValue = 1; | |
111 desc.maxValue = 10; | |
112 desc.defaultValue = 3; | |
113 desc.isQuantized = true; | |
114 desc.quantizeStep = 1; | |
115 list.push_back(desc); | |
116 | |
117 ParameterDescriptor desc2; | |
118 desc2.identifier = "w"; | |
119 desc2.name = "Smallest resolution"; | |
120 desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions"; | |
121 desc2.unit = ""; | |
122 desc2.minValue = 1; | |
123 desc2.maxValue = 14; | |
124 desc2.defaultValue = 10; | |
125 desc2.isQuantized = true; | |
126 desc2.quantizeStep = 1; | |
127 // I am so lazy | |
128 desc2.valueNames.push_back("2"); | |
129 desc2.valueNames.push_back("4"); | |
130 desc2.valueNames.push_back("8"); | |
131 desc2.valueNames.push_back("16"); | |
132 desc2.valueNames.push_back("32"); | |
133 desc2.valueNames.push_back("64"); | |
134 desc2.valueNames.push_back("128"); | |
135 desc2.valueNames.push_back("256"); | |
136 desc2.valueNames.push_back("512"); | |
137 desc2.valueNames.push_back("1024"); | |
138 desc2.valueNames.push_back("2048"); | |
139 desc2.valueNames.push_back("4096"); | |
140 desc2.valueNames.push_back("8192"); | |
141 desc2.valueNames.push_back("16384"); | |
142 list.push_back(desc2); | |
143 | |
144 return list; | |
145 } | |
146 | |
147 float | |
148 AdaptiveSpectrogram::getParameter(std::string id) const | |
149 { | |
150 if (id == "n") return m_n+1; | |
151 else if (id == "w") return m_w+1; | |
152 return 0.f; | |
153 } | |
154 | |
155 void | |
156 AdaptiveSpectrogram::setParameter(std::string id, float value) | |
157 { | |
158 if (id == "n") { | |
159 int n = lrintf(value); | |
160 if (n >= 1 && n <= 10) m_n = n-1; | |
161 } else if (id == "w") { | |
162 int w = lrintf(value); | |
163 if (w >= 1 && w <= 14) m_w = w-1; | |
164 } | |
165 } | |
166 | |
167 AdaptiveSpectrogram::OutputList | |
168 AdaptiveSpectrogram::getOutputDescriptors() const | |
169 { | |
170 OutputList list; | |
171 | |
172 OutputDescriptor d; | |
173 d.identifier = "output"; | |
174 d.name = "Output"; | |
175 d.description = "The output of the plugin"; | |
176 d.unit = ""; | |
177 d.hasFixedBinCount = true; | |
178 d.binCount = ((2 << m_w) << m_n) / 2; | |
179 d.hasKnownExtents = false; | |
180 d.isQuantized = false; | |
181 d.sampleType = OutputDescriptor::FixedSampleRate; | |
182 d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2); | |
183 d.hasDuration = false; | |
184 list.push_back(d); | |
185 | |
186 return list; | |
187 } | |
188 | |
189 AdaptiveSpectrogram::FeatureSet | |
190 AdaptiveSpectrogram::getRemainingFeatures() | |
191 { | |
192 FeatureSet fs; | |
193 return fs; | |
194 } | |
195 | |
196 AdaptiveSpectrogram::FeatureSet | |
197 AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts) | |
198 { | |
199 FeatureSet fs; | |
200 | |
201 int wid = (2 << m_w), WID = ((2 << m_w) << m_n); | |
202 int Res = log2(WID/wid)+1; | |
203 double ***specs = new double **[Res]; | |
204 int Wid = WID; | |
205 int wi = 0; | |
206 | |
207 cerr << "wid = " << wid << ", WID = " << WID << endl; | |
208 | |
209 double *tmpin = new double[WID]; | |
210 double *tmprout = new double[WID]; | |
211 double *tmpiout = new double[WID]; | |
212 | |
213 while (Wid >= wid) { | |
214 specs[wi] = new double *[WID/Wid]; | |
215 for (int i = 0; i < WID/Wid; ++i) { | |
216 specs[wi][i] = new double[Wid/2]; | |
217 int origin = WID/4 - Wid/4; // for 50% overlap | |
218 for (int j = 0; j < Wid; ++j) { | |
219 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / Wid); | |
220 tmpin[j] = inputBuffers[0][origin + i * Wid/2 + j] * mul; | |
221 } | |
222 FFT::process(Wid, false, tmpin, 0, tmprout, tmpiout); | |
223 for (int j = 0; j < Wid/2; ++j) { | |
224 double mag = | |
225 tmprout[j] * tmprout[j] + | |
226 tmpiout[j] * tmpiout[j]; | |
227 specs[wi][i][j] = sqrt(mag) / Wid; | |
228 } | |
229 } | |
230 Wid /= 2; | |
231 ++wi; | |
232 } | |
233 | |
234 int *spl = new int[WID/2]; | |
235 double *spec = new double[WID/2]; | |
236 | |
237 // This prefill makes it easy to see which elements are actually | |
238 // set by the MixSpectrogramBlock2 call. Turns out that, with | |
239 // 1024, 2048 and 4096 as our widths, the spl array has elements | |
240 // 0-4094 (incl) filled in and the spec array has elements 0-4095 | |
241 | |
242 for (int i = 0; i < WID/2; ++i) { | |
243 spl[i] = i; | |
244 spec[i] = i; | |
245 } | |
246 | |
247 MixSpectrogramBlock2(spl, spec, specs, WID/2, wid/2, false); | |
248 | |
249 Wid = WID; | |
250 wi = 0; | |
251 while (Wid >= wid) { | |
252 for (int i = 0; i < WID/Wid; ++i) { | |
253 delete[] specs[wi][i]; | |
254 } | |
255 delete[] specs[wi]; | |
256 Wid /= 2; | |
257 ++wi; | |
258 } | |
259 delete[] specs; | |
260 | |
261 std::cerr << "Results at " << ts << ":" << std::endl; | |
262 /* for (int i = 0; i < WID/2; ++i) { | |
263 if (spl[i] == i || spec[i] == i) { | |
264 std::cerr << "\n***\n"; | |
265 } | |
266 std::cerr << "[" << i << "] " << spl[i] << "," << spec[i] << " "; | |
267 } | |
268 std::cerr << std::endl; | |
269 */ | |
270 vector<vector<float> > rmat(WID/wid); | |
271 for (int i = 0; i < WID/wid; ++i) { | |
272 rmat[i] = vector<float>(WID/2); | |
273 } | |
274 | |
275 int y = 0, h = WID/2; | |
276 int x = 0, w = WID/wid; | |
277 unpackResultMatrix(rmat, x, y, w, h, spl, spec, WID/2, WID); | |
278 | |
279 delete[] spec; | |
280 delete[] spl; | |
281 | |
282 for (int i = 0; i < rmat.size(); ++i) { | |
283 Feature f; | |
284 f.hasTimestamp = false; | |
285 f.values = rmat[i]; | |
286 fs[0].push_back(f); | |
287 } | |
288 | |
289 /* | |
290 if (m_stepSize == 0) { | |
291 cerr << "ERROR: AdaptiveSpectrogram::process: " | |
292 << "AdaptiveSpectrogram has not been initialised" | |
293 << endl; | |
294 return fs; | |
295 } | |
296 */ | |
297 return fs; | |
298 } | |
299 | |
300 void | |
301 AdaptiveSpectrogram::unpackResultMatrix(vector<vector<float> > &rmat, | |
302 int x, int y, int w, int h, | |
303 int *spl, | |
304 double *spec, int sz, | |
305 int res | |
306 ) | |
307 { | |
308 | |
309 cerr << "x = " << x << ", y = " << y << ", w = " << w << ", h = " << h | |
310 << ", sz = " << sz << ", *spl = " << *spl << ", *spec = " << *spec << ", res = " << res << endl; | |
311 | |
312 if (sz <= 1) { | |
313 | |
314 for (int i = 0; i < w; ++i) { | |
315 for (int j = 0; j < h; ++j) { | |
316 // rmat[x+i][y+j] = (off ? 0 : *spec); | |
317 if (rmat[x+i][y+j] != 0) { | |
318 cerr << "WARNING: Overwriting value " << rmat[x+i][y+j] | |
319 << " with " << res + i + j << " at " << x+i << "," << y+j << endl; | |
320 } | |
321 // cerr << "[" << x+i << "][" << y+j << "] <= " << res+i+j << endl; | |
322 rmat[x+i][y+j] = *spec; | |
323 } | |
324 } | |
325 | |
326 // cerr << " (done)" << endl; | |
327 return; | |
328 } | |
329 // cerr << endl; | |
330 | |
331 if (*spl == 0) { | |
332 | |
333 unpackResultMatrix(rmat, | |
334 x, y, | |
335 w, h/2, | |
336 spl + 1, | |
337 spec, | |
338 sz/2, | |
339 res); | |
340 | |
341 unpackResultMatrix(rmat, | |
342 x, y + h/2, | |
343 w, h/2, | |
344 spl + sz/2, | |
345 spec + sz/2, | |
346 sz/2, | |
347 res); | |
348 | |
349 } else if (*spl == 1) { | |
350 | |
351 unpackResultMatrix(rmat, | |
352 x, y, | |
353 w/2, h, | |
354 spl + 1, | |
355 spec, | |
356 sz/2, | |
357 res/2); | |
358 | |
359 unpackResultMatrix(rmat, | |
360 x + w/2, y, | |
361 w/2, h, | |
362 spl + sz/2, | |
363 spec + sz/2, | |
364 sz/2, | |
365 res/2); | |
366 | |
367 } else { | |
368 cerr << "ERROR: *spl = " << *spl << endl; | |
369 } | |
370 } | |
371 | |
372 //spl[Y-1] | |
373 //Specs[R0][x0:x0+x-1][Y0:Y0+Y-1] | |
374 //Specs[R0+1][2x0:2x0+2x-1][Y0/2:Y0/2+Y/2-1] | |
375 //... | |
376 //Specs[R0+?][Nx0:Nx0+Nx-1][Y0/N:Y0/N+Y/N-1] | |
377 //N=WID/wid | |
378 | |
379 /** | |
380 * DoCutSpectrogramBlock2 finds the optimal "cutting" and returns it in spl. | |
381 */ | |
382 double | |
383 AdaptiveSpectrogram::DoCutSpectrogramBlock2(int* spl, double*** Specs, int Y, int R0, | |
384 int x0, int Y0, int N, double& ene) | |
385 { | |
386 double ent = 0; | |
387 | |
388 if (Y > N) { | |
389 | |
390 spl[0] = 0; | |
391 double ene1, ene2; | |
392 | |
393 ent += DoCutSpectrogramBlock2 | |
394 (&spl[1], Specs, Y/2, R0, x0, Y0, N, ene1); | |
395 | |
396 ent += DoCutSpectrogramBlock2 | |
397 (&spl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N, ene2); | |
398 | |
399 ene = ene1+ene2; | |
400 | |
401 } else if (N == 1) { | |
402 | |
403 double tmp = Specs[R0][x0][Y0]; | |
404 ene = tmp; | |
405 ent = xlogx(tmp); | |
406 | |
407 } else { | |
408 // Y == N, the square case | |
409 | |
410 double enel, ener, enet, eneb, entl, entr, entt, entb; | |
411 int* tmpspl = new int[Y]; | |
412 | |
413 entl = DoCutSpectrogramBlock2 | |
414 (&spl[1], Specs, Y/2, R0+1, 2*x0, Y0/2, N/2, enel); | |
415 | |
416 entr = DoCutSpectrogramBlock2 | |
417 (&spl[Y/2], Specs, Y/2, R0+1, 2*x0+1, Y0/2, N/2, ener); | |
418 | |
419 entb = DoCutSpectrogramBlock2 | |
420 (&tmpspl[1], Specs, Y/2, R0, x0, Y0, N/2, eneb); | |
421 | |
422 entt = DoCutSpectrogramBlock2 | |
423 (&tmpspl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N/2, enet); | |
424 | |
425 double | |
426 ene0 = enet + eneb, | |
427 ene1 = enel + ener, | |
428 ent0 = entt + entb, | |
429 ent1 = entl + entr; | |
430 | |
431 // normalize | |
432 | |
433 double eneres = 1 - (ene0+ene1)/2, norment0, norment1; | |
434 double a0 = 1 / (ene0+eneres), a1 = 1 / (ene1+eneres); | |
435 | |
436 // quasi-global normalization | |
437 | |
438 norment0 = (ent0 - ene0 * log(ene0+eneres)) / (ene0+eneres); | |
439 norment1 = (ent1 - ene1 * log(ene1+eneres)) / (ene1+eneres); | |
440 | |
441 // local normalization | |
442 | |
443 if (norment1 < norment0) { | |
444 spl[0] = 0; | |
445 ent = ent0, ene = ene0; | |
446 memcpy(&spl[1], &tmpspl[1], sizeof(int)*(Y-2)); | |
447 } else { | |
448 spl[0] = 1; | |
449 ent = ent1, ene = ene1; | |
450 } | |
451 } | |
452 return ent; | |
453 } | |
454 | |
455 /** | |
456 * DoMixSpectrogramBlock2 collects values from the multiple | |
457 * spectrograms Specs into a linear array Spec. | |
458 */ | |
459 double | |
460 AdaptiveSpectrogram::DoMixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int Y, | |
461 int R0, int x0, int Y0, bool normmix, int res, | |
462 double* e) | |
463 { | |
464 if (Y == 1) { | |
465 | |
466 Spec[0] = Specs[R0][x0][Y0]*e[0]; | |
467 | |
468 } else { | |
469 | |
470 double le[32]; | |
471 | |
472 if (normmix && Y < (1<<res)) { | |
473 | |
474 for (int i = 0, j = 1, k = Y; | |
475 i < res; | |
476 i++, j *= 2, k /= 2) { | |
477 | |
478 double lle = 0; | |
479 | |
480 for (int fr = 0; fr < j; fr++) { | |
481 for (int n = 0; n < k; n++) { | |
482 lle += | |
483 Specs[i+R0][x0+fr][Y0+n] * | |
484 Specs[i+R0][x0+fr][Y0+n]; | |
485 } | |
486 } | |
487 | |
488 lle = sqrt(lle)*e[i]; | |
489 | |
490 if (i == 0) { | |
491 le[0] = lle; | |
492 } else if (lle > le[0]*2) { | |
493 le[i] = e[i]*le[0]*2/lle; | |
494 } else { | |
495 le[i] = e[i]; | |
496 } | |
497 } | |
498 | |
499 le[0] = e[0]; | |
500 | |
501 } else { | |
502 | |
503 memcpy(le, e, sizeof(double)*res); | |
504 } | |
505 | |
506 if (spl[0] == 0) { | |
507 | |
508 int newres; | |
509 if (Y >= (1<<res)) newres = res; | |
510 else newres = res-1; | |
511 | |
512 DoMixSpectrogramBlock2 | |
513 (&spl[1], Spec, Specs, Y/2, R0, x0, Y0, | |
514 normmix, newres, le); | |
515 | |
516 DoMixSpectrogramBlock2 | |
517 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, | |
518 normmix, newres, le); | |
519 | |
520 } else { | |
521 | |
522 DoMixSpectrogramBlock2 | |
523 (&spl[1], Spec, Specs, Y/2, R0+1, x0*2, Y0/2, | |
524 normmix, res-1, &le[1]); | |
525 | |
526 DoMixSpectrogramBlock2 | |
527 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0+1, x0*2+1, Y0/2, | |
528 normmix, res-1, &le[1]); | |
529 } | |
530 } | |
531 | |
532 return 0; | |
533 } | |
534 | |
535 /** | |
536 * MixSpectrogramBlock2 calls the two Do...() to do the real work. | |
537 * | |
538 * At this point: | |
539 * spl is... what? the returned "cutting", organised how? | |
540 * Spec is... what? the returned spectrogram, organised how? | |
541 * Specs is an array of input spectrograms | |
542 * WID is the maximum window size | |
543 * wid is the minimum window size | |
544 * normmix is... what? | |
545 */ | |
546 double | |
547 AdaptiveSpectrogram::MixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int | |
548 WID, int wid, bool normmix) | |
549 { | |
550 double ene[32]; | |
551 | |
552 // find the total energy and normalize | |
553 | |
554 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) { | |
555 | |
556 double lene = 0; | |
557 | |
558 for (int fr = 0; fr < Fr; fr++) { | |
559 for (int k = 0; k < Wid; k++) { | |
560 lene += Specs[i][fr][k]*Specs[i][fr][k]; | |
561 } | |
562 } | |
563 | |
564 ene[i] = lene; | |
565 | |
566 if (lene != 0) { | |
567 double ilene = 1.0/lene; | |
568 for (int fr = 0; fr < Fr; fr++) { | |
569 for (int k = 0; k < Wid; k++) { | |
570 Specs[i][fr][k] = Specs[i][fr][k]*Specs[i][fr][k]*ilene; | |
571 } | |
572 } | |
573 } | |
574 } | |
575 | |
576 | |
577 double result = DoCutSpectrogramBlock2 | |
578 (spl, Specs, WID, 0, 0, 0, WID/wid, ene[31]); | |
579 | |
580 // de-normalize | |
581 | |
582 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) { | |
583 double lene = ene[i]; | |
584 if (lene != 0) { | |
585 for (int fr = 0; fr < Fr; fr++) { | |
586 for (int k = 0; k < Wid; k++) { | |
587 Specs[i][fr][k] = sqrt(Specs[i][fr][k]*lene); | |
588 } | |
589 } | |
590 } | |
591 } | |
592 | |
593 double e[32]; | |
594 for (int i = 0; i < 32; i++) e[i] = 1; | |
595 | |
596 DoMixSpectrogramBlock2 | |
597 (spl, Spec, Specs, WID, 0, 0, 0, normmix, log2(WID/wid)+1, e); | |
598 | |
599 return result; | |
600 } | |
601 | |
602 /** | |
603 * MixSpectrogram2 does the work for Fr frames (the largest frame), | |
604 * which basically calls MixSpectrogramBlock2 Fr times. | |
605 * | |
606 * the 3-D array Specs is the multiple spectrograms calculated with | |
607 * window sizes between wid and WID, Specs[0] is the 0th spectrogram, | |
608 * etc. | |
609 * | |
610 * spl and Spec for all frames are returned by MixSpectrogram2, each | |
611 * as a 2-D array. | |
612 */ | |
613 double | |
614 AdaptiveSpectrogram::MixSpectrogram2(int** spl, double** Spec, double*** Specs, int Fr, | |
615 int WID, int wid, bool norm, bool normmix) | |
616 { | |
617 // totally Fr frames of WID samples | |
618 // each frame is divided into wid VERTICAL parts | |
619 | |
620 int Res = log2(WID/wid)+1; | |
621 double*** lSpecs = new double**[Res]; | |
622 | |
623 for (int i = 0; i < Fr; i++) { | |
624 | |
625 for (int j = 0, nfr = 1; j < Res; j++, nfr *= 2) { | |
626 lSpecs[j] = &Specs[j][i*nfr]; | |
627 } | |
628 | |
629 MixSpectrogramBlock2(spl[i], Spec[i], lSpecs, WID, wid, norm); | |
630 } | |
631 | |
632 delete[] lSpecs; | |
633 return 0; | |
634 } | |
635 |