c@92
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@92
|
2
|
c@92
|
3 /*
|
c@92
|
4 QM Vamp Plugin Set
|
c@92
|
5
|
c@92
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@92
|
7 All rights reserved.
|
c@92
|
8 */
|
c@92
|
9
|
c@92
|
10 #include "AdaptiveSpectrogram.h"
|
c@92
|
11
|
c@92
|
12 #include <cstdlib>
|
c@92
|
13 #include <cstring>
|
c@92
|
14
|
c@92
|
15 #include <iostream>
|
c@92
|
16
|
c@92
|
17 #include <dsp/transforms/FFT.h>
|
c@92
|
18
|
c@92
|
19 using std::string;
|
c@92
|
20 using std::vector;
|
c@92
|
21 using std::cerr;
|
c@92
|
22 using std::endl;
|
c@92
|
23
|
c@92
|
24 using Vamp::RealTime;
|
c@92
|
25
|
c@99
|
26 //#define DEBUG_VERBOSE 1
|
c@99
|
27
|
c@92
|
28 AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) :
|
c@92
|
29 Plugin(inputSampleRate),
|
c@92
|
30 m_w(9),
|
c@92
|
31 m_n(2)
|
c@100
|
32 // m_w(0),
|
c@100
|
33 // m_n(2)
|
c@92
|
34 {
|
c@92
|
35 }
|
c@92
|
36
|
c@92
|
37 AdaptiveSpectrogram::~AdaptiveSpectrogram()
|
c@92
|
38 {
|
c@92
|
39 }
|
c@92
|
40
|
c@92
|
41 string
|
c@92
|
42 AdaptiveSpectrogram::getIdentifier() const
|
c@92
|
43 {
|
c@93
|
44 return "qm-adaptivespectrogram";
|
c@92
|
45 }
|
c@92
|
46
|
c@92
|
47 string
|
c@92
|
48 AdaptiveSpectrogram::getName() const
|
c@92
|
49 {
|
c@92
|
50 return "Adaptive Spectrogram";
|
c@92
|
51 }
|
c@92
|
52
|
c@92
|
53 string
|
c@92
|
54 AdaptiveSpectrogram::getDescription() const
|
c@92
|
55 {
|
c@92
|
56 return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions";
|
c@92
|
57 }
|
c@92
|
58
|
c@92
|
59 string
|
c@92
|
60 AdaptiveSpectrogram::getMaker() const
|
c@92
|
61 {
|
c@92
|
62 return "Queen Mary, University of London";
|
c@92
|
63 }
|
c@92
|
64
|
c@92
|
65 int
|
c@92
|
66 AdaptiveSpectrogram::getPluginVersion() const
|
c@92
|
67 {
|
c@92
|
68 return 1;
|
c@92
|
69 }
|
c@92
|
70
|
c@92
|
71 string
|
c@92
|
72 AdaptiveSpectrogram::getCopyright() const
|
c@92
|
73 {
|
c@92
|
74 return "Plugin by Wen Xue and Chris Cannam. Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved";
|
c@92
|
75 }
|
c@92
|
76
|
c@92
|
77 size_t
|
c@92
|
78 AdaptiveSpectrogram::getPreferredStepSize() const
|
c@92
|
79 {
|
c@92
|
80 return ((2 << m_w) << m_n) / 2;
|
c@92
|
81 }
|
c@92
|
82
|
c@92
|
83 size_t
|
c@92
|
84 AdaptiveSpectrogram::getPreferredBlockSize() const
|
c@92
|
85 {
|
c@92
|
86 return (2 << m_w) << m_n;
|
c@92
|
87 }
|
c@92
|
88
|
c@92
|
89 bool
|
c@92
|
90 AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
c@92
|
91 {
|
c@92
|
92 if (channels < getMinChannelCount() ||
|
c@92
|
93 channels > getMaxChannelCount()) return false;
|
c@92
|
94
|
c@92
|
95 return true;
|
c@92
|
96 }
|
c@92
|
97
|
c@92
|
98 void
|
c@92
|
99 AdaptiveSpectrogram::reset()
|
c@92
|
100 {
|
c@92
|
101
|
c@92
|
102 }
|
c@92
|
103
|
c@92
|
104 AdaptiveSpectrogram::ParameterList
|
c@92
|
105 AdaptiveSpectrogram::getParameterDescriptors() const
|
c@92
|
106 {
|
c@92
|
107 ParameterList list;
|
c@92
|
108
|
c@92
|
109 ParameterDescriptor desc;
|
c@92
|
110 desc.identifier = "n";
|
c@92
|
111 desc.name = "Number of resolutions";
|
c@92
|
112 desc.description = "Number of consecutive powers of two to use as spectrogram resolutions, starting with the minimum resolution specified";
|
c@92
|
113 desc.unit = "";
|
c@92
|
114 desc.minValue = 1;
|
c@92
|
115 desc.maxValue = 10;
|
c@92
|
116 desc.defaultValue = 3;
|
c@92
|
117 desc.isQuantized = true;
|
c@92
|
118 desc.quantizeStep = 1;
|
c@92
|
119 list.push_back(desc);
|
c@92
|
120
|
c@92
|
121 ParameterDescriptor desc2;
|
c@92
|
122 desc2.identifier = "w";
|
c@92
|
123 desc2.name = "Smallest resolution";
|
c@92
|
124 desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions";
|
c@92
|
125 desc2.unit = "";
|
c@92
|
126 desc2.minValue = 1;
|
c@92
|
127 desc2.maxValue = 14;
|
c@92
|
128 desc2.defaultValue = 10;
|
c@92
|
129 desc2.isQuantized = true;
|
c@92
|
130 desc2.quantizeStep = 1;
|
c@92
|
131 // I am so lazy
|
c@92
|
132 desc2.valueNames.push_back("2");
|
c@92
|
133 desc2.valueNames.push_back("4");
|
c@92
|
134 desc2.valueNames.push_back("8");
|
c@92
|
135 desc2.valueNames.push_back("16");
|
c@92
|
136 desc2.valueNames.push_back("32");
|
c@92
|
137 desc2.valueNames.push_back("64");
|
c@92
|
138 desc2.valueNames.push_back("128");
|
c@92
|
139 desc2.valueNames.push_back("256");
|
c@92
|
140 desc2.valueNames.push_back("512");
|
c@92
|
141 desc2.valueNames.push_back("1024");
|
c@92
|
142 desc2.valueNames.push_back("2048");
|
c@92
|
143 desc2.valueNames.push_back("4096");
|
c@92
|
144 desc2.valueNames.push_back("8192");
|
c@92
|
145 desc2.valueNames.push_back("16384");
|
c@92
|
146 list.push_back(desc2);
|
c@92
|
147
|
c@92
|
148 return list;
|
c@92
|
149 }
|
c@92
|
150
|
c@92
|
151 float
|
c@92
|
152 AdaptiveSpectrogram::getParameter(std::string id) const
|
c@92
|
153 {
|
c@92
|
154 if (id == "n") return m_n+1;
|
c@92
|
155 else if (id == "w") return m_w+1;
|
c@92
|
156 return 0.f;
|
c@92
|
157 }
|
c@92
|
158
|
c@92
|
159 void
|
c@92
|
160 AdaptiveSpectrogram::setParameter(std::string id, float value)
|
c@92
|
161 {
|
c@92
|
162 if (id == "n") {
|
c@92
|
163 int n = lrintf(value);
|
c@92
|
164 if (n >= 1 && n <= 10) m_n = n-1;
|
c@92
|
165 } else if (id == "w") {
|
c@92
|
166 int w = lrintf(value);
|
c@92
|
167 if (w >= 1 && w <= 14) m_w = w-1;
|
c@92
|
168 }
|
c@92
|
169 }
|
c@92
|
170
|
c@92
|
171 AdaptiveSpectrogram::OutputList
|
c@92
|
172 AdaptiveSpectrogram::getOutputDescriptors() const
|
c@92
|
173 {
|
c@92
|
174 OutputList list;
|
c@92
|
175
|
c@92
|
176 OutputDescriptor d;
|
c@92
|
177 d.identifier = "output";
|
c@92
|
178 d.name = "Output";
|
c@92
|
179 d.description = "The output of the plugin";
|
c@92
|
180 d.unit = "";
|
c@92
|
181 d.hasFixedBinCount = true;
|
c@92
|
182 d.binCount = ((2 << m_w) << m_n) / 2;
|
c@92
|
183 d.hasKnownExtents = false;
|
c@92
|
184 d.isQuantized = false;
|
c@92
|
185 d.sampleType = OutputDescriptor::FixedSampleRate;
|
c@92
|
186 d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2);
|
c@92
|
187 d.hasDuration = false;
|
c@92
|
188 list.push_back(d);
|
c@92
|
189
|
c@92
|
190 return list;
|
c@92
|
191 }
|
c@92
|
192
|
c@92
|
193 AdaptiveSpectrogram::FeatureSet
|
c@92
|
194 AdaptiveSpectrogram::getRemainingFeatures()
|
c@92
|
195 {
|
c@92
|
196 FeatureSet fs;
|
c@92
|
197 return fs;
|
c@92
|
198 }
|
c@92
|
199
|
c@100
|
200 #ifdef NOT_DEFINED
|
c@92
|
201 AdaptiveSpectrogram::FeatureSet
|
c@92
|
202 AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts)
|
c@92
|
203 {
|
c@92
|
204 FeatureSet fs;
|
c@92
|
205
|
c@92
|
206 int wid = (2 << m_w), WID = ((2 << m_w) << m_n);
|
c@92
|
207 int Res = log2(WID/wid)+1;
|
c@92
|
208 double ***specs = new double **[Res];
|
c@92
|
209 int Wid = WID;
|
c@92
|
210 int wi = 0;
|
c@92
|
211
|
c@99
|
212 #ifdef DEBUG_VERBOSE
|
c@92
|
213 cerr << "wid = " << wid << ", WID = " << WID << endl;
|
c@99
|
214 #endif
|
c@92
|
215
|
c@92
|
216 double *tmpin = new double[WID];
|
c@92
|
217 double *tmprout = new double[WID];
|
c@92
|
218 double *tmpiout = new double[WID];
|
c@92
|
219
|
c@92
|
220 while (Wid >= wid) {
|
c@92
|
221 specs[wi] = new double *[WID/Wid];
|
c@92
|
222 for (int i = 0; i < WID/Wid; ++i) {
|
c@92
|
223 specs[wi][i] = new double[Wid/2];
|
c@92
|
224 int origin = WID/4 - Wid/4; // for 50% overlap
|
c@92
|
225 for (int j = 0; j < Wid; ++j) {
|
c@92
|
226 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / Wid);
|
c@92
|
227 tmpin[j] = inputBuffers[0][origin + i * Wid/2 + j] * mul;
|
c@92
|
228 }
|
c@92
|
229 FFT::process(Wid, false, tmpin, 0, tmprout, tmpiout);
|
c@92
|
230 for (int j = 0; j < Wid/2; ++j) {
|
c@99
|
231 int k = j+1; // include Nyquist but not DC
|
c@99
|
232 double mag = sqrt(tmprout[k] * tmprout[k] +
|
c@99
|
233 tmpiout[k] * tmpiout[k]);
|
c@99
|
234 double scaled = mag / (Wid/2);
|
c@99
|
235 // double power = scaled*scaled;
|
c@99
|
236 // if (k < Wid/2) power = power*2;
|
c@99
|
237 specs[wi][i][j] = scaled;
|
c@92
|
238 }
|
c@92
|
239 }
|
c@92
|
240 Wid /= 2;
|
c@92
|
241 ++wi;
|
c@92
|
242 }
|
c@92
|
243
|
c@99
|
244 /*
|
c@99
|
245 while (Wid >= wid) {
|
c@99
|
246 specs[wi] = new double *[WID/Wid];
|
c@99
|
247 cerr << "filling width " << Wid << endl;
|
c@99
|
248 for (int i = 0; i < WID/Wid; ++i) {
|
c@99
|
249 specs[wi][i] = new double[Wid/2];
|
c@99
|
250 for (int j = 0; j < Wid/2; ++j) {
|
c@99
|
251
|
c@99
|
252 specs[wi][i][j] = 0;
|
c@99
|
253 int x0 = i * Wid/2;
|
c@99
|
254 int x1 = (i+1) * Wid/2 - 1;
|
c@99
|
255 int y0 = j * (WID/Wid);
|
c@99
|
256 int y1 = (j+1) * (WID/Wid) - 1;
|
c@99
|
257
|
c@99
|
258 cerr << "box at " << i << "," << j << " covers [" << x0 << "," << y0 << "] to [" << x1 << "," << y1 << "]" << endl;
|
c@99
|
259
|
c@99
|
260 for (int y = WID/4; y < WID/2; ++y) {
|
c@99
|
261 for (int x = WID/4-2; x < WID/4; ++x) {
|
c@99
|
262 if (x >= x0 && x <= x1 && y >= y0 && y <= y1) {
|
c@99
|
263 ++specs[wi][i][j];
|
c@99
|
264 }
|
c@99
|
265 }
|
c@99
|
266 }
|
c@99
|
267
|
c@99
|
268 for (int x = 0; x < WID/2; ++x) {
|
c@99
|
269 int y = 0;
|
c@99
|
270 if (x >= x0 && x <= x1 && y >= y0 && y <= y1) {
|
c@99
|
271 ++specs[wi][i][j];
|
c@99
|
272 }
|
c@99
|
273 }
|
c@99
|
274 }
|
c@99
|
275 }
|
c@99
|
276 cerr << "this spectrogram:" << endl;
|
c@99
|
277 for (int j = Wid/2-1; j >= 0; --j) {
|
c@99
|
278 for (int i = 0; i < WID/Wid; ++i) {
|
c@99
|
279 cerr << specs[wi][i][j] << " ";
|
c@99
|
280 }
|
c@99
|
281 cerr << endl;
|
c@99
|
282 }
|
c@99
|
283 Wid /= 2;
|
c@99
|
284 ++wi;
|
c@99
|
285 }
|
c@99
|
286 */
|
c@99
|
287
|
c@92
|
288 int *spl = new int[WID/2];
|
c@92
|
289 double *spec = new double[WID/2];
|
c@92
|
290
|
c@92
|
291 // This prefill makes it easy to see which elements are actually
|
c@92
|
292 // set by the MixSpectrogramBlock2 call. Turns out that, with
|
c@92
|
293 // 1024, 2048 and 4096 as our widths, the spl array has elements
|
c@92
|
294 // 0-4094 (incl) filled in and the spec array has elements 0-4095
|
c@92
|
295
|
c@92
|
296 for (int i = 0; i < WID/2; ++i) {
|
c@92
|
297 spl[i] = i;
|
c@92
|
298 spec[i] = i;
|
c@92
|
299 }
|
c@92
|
300
|
c@92
|
301 MixSpectrogramBlock2(spl, spec, specs, WID/2, wid/2, false);
|
c@92
|
302
|
c@92
|
303 Wid = WID;
|
c@92
|
304 wi = 0;
|
c@92
|
305 while (Wid >= wid) {
|
c@92
|
306 for (int i = 0; i < WID/Wid; ++i) {
|
c@92
|
307 delete[] specs[wi][i];
|
c@92
|
308 }
|
c@92
|
309 delete[] specs[wi];
|
c@92
|
310 Wid /= 2;
|
c@92
|
311 ++wi;
|
c@92
|
312 }
|
c@92
|
313 delete[] specs;
|
c@92
|
314
|
c@99
|
315 #ifdef DEBUG_VERBOSE
|
c@92
|
316 std::cerr << "Results at " << ts << ":" << std::endl;
|
c@99
|
317 for (int i = 0; i < WID/2; ++i) {
|
c@99
|
318 // if (spl[i] == i || spec[i] == i) {
|
c@99
|
319 // std::cerr << "\n***\n";
|
c@99
|
320 // }
|
c@92
|
321 std::cerr << "[" << i << "] " << spl[i] << "," << spec[i] << " ";
|
c@92
|
322 }
|
c@92
|
323 std::cerr << std::endl;
|
c@99
|
324 #endif
|
c@99
|
325
|
c@92
|
326 vector<vector<float> > rmat(WID/wid);
|
c@92
|
327 for (int i = 0; i < WID/wid; ++i) {
|
c@92
|
328 rmat[i] = vector<float>(WID/2);
|
c@92
|
329 }
|
c@92
|
330
|
c@92
|
331 int y = 0, h = WID/2;
|
c@92
|
332 int x = 0, w = WID/wid;
|
c@92
|
333 unpackResultMatrix(rmat, x, y, w, h, spl, spec, WID/2, WID);
|
c@92
|
334
|
c@92
|
335 delete[] spec;
|
c@92
|
336 delete[] spl;
|
c@92
|
337
|
c@92
|
338 for (int i = 0; i < rmat.size(); ++i) {
|
c@92
|
339 Feature f;
|
c@92
|
340 f.hasTimestamp = false;
|
c@92
|
341 f.values = rmat[i];
|
c@92
|
342 fs[0].push_back(f);
|
c@92
|
343 }
|
c@92
|
344
|
c@92
|
345 /*
|
c@92
|
346 if (m_stepSize == 0) {
|
c@92
|
347 cerr << "ERROR: AdaptiveSpectrogram::process: "
|
c@92
|
348 << "AdaptiveSpectrogram has not been initialised"
|
c@92
|
349 << endl;
|
c@92
|
350 return fs;
|
c@92
|
351 }
|
c@92
|
352 */
|
c@92
|
353 return fs;
|
c@92
|
354 }
|
c@100
|
355 #endif
|
c@92
|
356
|
c@100
|
357 AdaptiveSpectrogram::FeatureSet
|
c@100
|
358 AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts)
|
c@100
|
359 {
|
c@100
|
360 FeatureSet fs;
|
c@100
|
361
|
c@100
|
362 int minwid = (2 << m_w), maxwid = ((2 << m_w) << m_n);
|
c@100
|
363
|
c@100
|
364 cerr << "widths from " << minwid << " to " << maxwid << " ("
|
c@100
|
365 << minwid/2 << " to " << maxwid/2 << " in real parts)" << endl;
|
c@100
|
366
|
c@100
|
367 Spectrograms s(minwid/2, maxwid/2, 1);
|
c@100
|
368
|
c@100
|
369 double *tmpin = new double[maxwid];
|
c@100
|
370 double *tmprout = new double[maxwid];
|
c@100
|
371 double *tmpiout = new double[maxwid];
|
c@100
|
372
|
c@100
|
373 int w = minwid;
|
c@100
|
374 int index = 0;
|
c@100
|
375
|
c@100
|
376 while (w <= maxwid) {
|
c@100
|
377 for (int i = 0; i < maxwid / w; ++i) {
|
c@100
|
378 int origin = maxwid/4 - w/4; // for 50% overlap
|
c@100
|
379 for (int j = 0; j < w; ++j) {
|
c@100
|
380 double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / w);
|
c@100
|
381 tmpin[j] = inputBuffers[0][origin + i * w/2 + j] * mul;
|
c@100
|
382 }
|
c@100
|
383 FFT::process(w, false, tmpin, 0, tmprout, tmpiout);
|
c@100
|
384 for (int j = 0; j < w/2; ++j) {
|
c@100
|
385 int k = j+1; // include Nyquist but not DC
|
c@100
|
386 double mag = sqrt(tmprout[k] * tmprout[k] +
|
c@100
|
387 tmpiout[k] * tmpiout[k]);
|
c@100
|
388 double scaled = mag / (w/2);
|
c@100
|
389 s.spectrograms[index]->data[i][j] = scaled;
|
c@100
|
390 }
|
c@100
|
391 }
|
c@100
|
392 w *= 2;
|
c@100
|
393 ++index;
|
c@100
|
394 }
|
c@100
|
395
|
c@100
|
396 Cutting *cutting = cut(s, maxwid/2, 0, 0, maxwid/2);
|
c@100
|
397
|
c@100
|
398 printCutting(cutting, " ");
|
c@100
|
399
|
c@100
|
400 vector<vector<float> > rmat(maxwid/minwid);
|
c@100
|
401 for (int i = 0; i < maxwid/minwid; ++i) {
|
c@100
|
402 rmat[i] = vector<float>(maxwid/2);
|
c@100
|
403 }
|
c@100
|
404
|
c@100
|
405 assemble(s, cutting, rmat, 0, 0, maxwid/minwid, maxwid/2);
|
c@100
|
406
|
c@100
|
407 delete cutting;
|
c@100
|
408
|
c@100
|
409 for (int i = 0; i < rmat.size(); ++i) {
|
c@100
|
410 Feature f;
|
c@100
|
411 f.hasTimestamp = false;
|
c@100
|
412 f.values = rmat[i];
|
c@100
|
413 fs[0].push_back(f);
|
c@100
|
414 }
|
c@100
|
415
|
c@100
|
416 return fs;
|
c@100
|
417 }
|
c@100
|
418
|
c@100
|
419 void
|
c@100
|
420 AdaptiveSpectrogram::printCutting(Cutting *c, string pfx)
|
c@100
|
421 {
|
c@100
|
422 if (c->first) {
|
c@100
|
423 if (c->cut == Cutting::Horizontal) {
|
c@100
|
424 cerr << pfx << "H" << endl;
|
c@100
|
425 } else if (c->cut == Cutting::Vertical) {
|
c@100
|
426 cerr << pfx << "V" << endl;
|
c@100
|
427 }
|
c@100
|
428 printCutting(c->first, pfx + " ");
|
c@100
|
429 printCutting(c->second, pfx + " ");
|
c@100
|
430 } else {
|
c@100
|
431 cerr << pfx << "* " << c->value << endl;
|
c@100
|
432 }
|
c@100
|
433 }
|
c@100
|
434
|
c@100
|
435 AdaptiveSpectrogram::Cutting *
|
c@100
|
436 AdaptiveSpectrogram::cut(const Spectrograms &s,
|
c@100
|
437 int res,
|
c@100
|
438 int x, int y, int h)
|
c@100
|
439 {
|
c@100
|
440 // cerr << "res = " << res << ", x = " << x << ", y = " << y << ", h = " << h << endl;
|
c@100
|
441
|
c@100
|
442 if (h > 1 && res > s.minres) {
|
c@100
|
443
|
c@100
|
444 // The "vertical" division is a top/bottom split.
|
c@100
|
445 // Splitting this way keeps us in the same resolution,
|
c@100
|
446 // but with two vertical subregions of height h/2.
|
c@100
|
447
|
c@100
|
448 Cutting *top = cut(s, res, x, y + h/2, h/2);
|
c@100
|
449 Cutting *bottom = cut(s, res, x, y, h/2);
|
c@100
|
450
|
c@100
|
451 // The "horizontal" division is a left/right split. Splitting
|
c@100
|
452 // this way places us in resolution res/2, which has lower
|
c@100
|
453 // vertical resolution but higher horizontal resolution. We
|
c@100
|
454 // need to double x accordingly.
|
c@100
|
455
|
c@100
|
456 Cutting *left = cut(s, res/2, 2 * x, y/2, h/2);
|
c@100
|
457 Cutting *right = cut(s, res/2, 2 * x + 1, y/2, h/2);
|
c@100
|
458
|
c@100
|
459 double vcost = top->cost + bottom->cost;
|
c@100
|
460 double hcost = left->cost + right->cost;
|
c@100
|
461
|
c@100
|
462 if (vcost > hcost) {
|
c@100
|
463
|
c@100
|
464 // cut horizontally (left/right)
|
c@100
|
465
|
c@100
|
466 Cutting *cutting = new Cutting;
|
c@100
|
467 cutting->cut = Cutting::Horizontal;
|
c@100
|
468 cutting->first = left;
|
c@100
|
469 cutting->second = right;
|
c@100
|
470 cutting->cost = hcost;
|
c@100
|
471 cutting->value = left->value + right->value;
|
c@100
|
472 delete top;
|
c@100
|
473 delete bottom;
|
c@100
|
474 return cutting;
|
c@100
|
475
|
c@100
|
476 } else {
|
c@100
|
477
|
c@100
|
478 Cutting *cutting = new Cutting;
|
c@100
|
479 cutting->cut = Cutting::Vertical;
|
c@100
|
480 cutting->first = top;
|
c@100
|
481 cutting->second = bottom;
|
c@100
|
482 cutting->cost = vcost;
|
c@100
|
483 cutting->value = top->value + bottom->value;
|
c@100
|
484 delete left;
|
c@100
|
485 delete right;
|
c@100
|
486 return cutting;
|
c@100
|
487 }
|
c@100
|
488
|
c@100
|
489 } else {
|
c@100
|
490
|
c@100
|
491 // no cuts possible from this level
|
c@100
|
492
|
c@100
|
493 Cutting *cutting = new Cutting;
|
c@100
|
494 cutting->cut = Cutting::Finished;
|
c@100
|
495 cutting->first = 0;
|
c@100
|
496 cutting->second = 0;
|
c@100
|
497
|
c@100
|
498 int n = 0;
|
c@100
|
499 for (int r = res; r > s.minres; r /= 2) ++n;
|
c@100
|
500 const Spectrogram *spectrogram = s.spectrograms[n];
|
c@100
|
501
|
c@100
|
502 cutting->cost = cost(*spectrogram, x, y);
|
c@100
|
503 cutting->value = value(*spectrogram, x, y);
|
c@100
|
504
|
c@100
|
505 // cerr << "cost for this cell: " << cutting->cost << endl;
|
c@100
|
506
|
c@100
|
507 return cutting;
|
c@100
|
508 }
|
c@100
|
509 }
|
c@100
|
510
|
c@100
|
511 void
|
c@100
|
512 AdaptiveSpectrogram::assemble(const Spectrograms &s,
|
c@100
|
513 const Cutting *cutting,
|
c@100
|
514 vector<vector<float> > &rmat,
|
c@100
|
515 int x, int y, int w, int h)
|
c@100
|
516 {
|
c@100
|
517 switch (cutting->cut) {
|
c@100
|
518
|
c@100
|
519 case Cutting::Finished:
|
c@100
|
520 for (int i = 0; i < w; ++i) {
|
c@100
|
521 for (int j = 0; j < h; ++j) {
|
c@100
|
522 rmat[x+i][y+j] = cutting->value;
|
c@100
|
523 }
|
c@100
|
524 }
|
c@100
|
525 return;
|
c@100
|
526
|
c@100
|
527 case Cutting::Horizontal:
|
c@100
|
528 assemble(s, cutting->first, rmat, x, y, w/2, h);
|
c@100
|
529 assemble(s, cutting->second, rmat, x+w/2, y, w/2, h);
|
c@100
|
530 break;
|
c@100
|
531
|
c@100
|
532 case Cutting::Vertical:
|
c@100
|
533 assemble(s, cutting->first, rmat, x, y+h/2, w, h/2);
|
c@100
|
534 assemble(s, cutting->second, rmat, x, y, w, h/2);
|
c@100
|
535 break;
|
c@100
|
536 }
|
c@100
|
537 }
|
c@100
|
538
|
c@92
|
539 void
|
c@92
|
540 AdaptiveSpectrogram::unpackResultMatrix(vector<vector<float> > &rmat,
|
c@92
|
541 int x, int y, int w, int h,
|
c@92
|
542 int *spl,
|
c@92
|
543 double *spec, int sz,
|
c@92
|
544 int res
|
c@92
|
545 )
|
c@92
|
546 {
|
c@92
|
547
|
c@99
|
548 #ifdef DEBUG_VERBOSE
|
c@92
|
549 cerr << "x = " << x << ", y = " << y << ", w = " << w << ", h = " << h
|
c@92
|
550 << ", sz = " << sz << ", *spl = " << *spl << ", *spec = " << *spec << ", res = " << res << endl;
|
c@99
|
551 #endif
|
c@92
|
552
|
c@92
|
553 if (sz <= 1) {
|
c@92
|
554
|
c@92
|
555 for (int i = 0; i < w; ++i) {
|
c@92
|
556 for (int j = 0; j < h; ++j) {
|
c@92
|
557 if (rmat[x+i][y+j] != 0) {
|
c@92
|
558 cerr << "WARNING: Overwriting value " << rmat[x+i][y+j]
|
c@92
|
559 << " with " << res + i + j << " at " << x+i << "," << y+j << endl;
|
c@92
|
560 }
|
c@92
|
561 rmat[x+i][y+j] = *spec;
|
c@92
|
562 }
|
c@92
|
563 }
|
c@92
|
564
|
c@92
|
565 return;
|
c@92
|
566 }
|
c@92
|
567
|
c@92
|
568 if (*spl == 0) {
|
c@92
|
569
|
c@92
|
570 unpackResultMatrix(rmat,
|
c@92
|
571 x, y,
|
c@92
|
572 w, h/2,
|
c@92
|
573 spl + 1,
|
c@92
|
574 spec,
|
c@92
|
575 sz/2,
|
c@92
|
576 res);
|
c@92
|
577
|
c@92
|
578 unpackResultMatrix(rmat,
|
c@92
|
579 x, y + h/2,
|
c@92
|
580 w, h/2,
|
c@92
|
581 spl + sz/2,
|
c@92
|
582 spec + sz/2,
|
c@92
|
583 sz/2,
|
c@92
|
584 res);
|
c@92
|
585
|
c@92
|
586 } else if (*spl == 1) {
|
c@92
|
587
|
c@92
|
588 unpackResultMatrix(rmat,
|
c@92
|
589 x, y,
|
c@92
|
590 w/2, h,
|
c@92
|
591 spl + 1,
|
c@92
|
592 spec,
|
c@92
|
593 sz/2,
|
c@92
|
594 res/2);
|
c@92
|
595
|
c@92
|
596 unpackResultMatrix(rmat,
|
c@92
|
597 x + w/2, y,
|
c@92
|
598 w/2, h,
|
c@92
|
599 spl + sz/2,
|
c@92
|
600 spec + sz/2,
|
c@92
|
601 sz/2,
|
c@92
|
602 res/2);
|
c@92
|
603
|
c@92
|
604 } else {
|
c@92
|
605 cerr << "ERROR: *spl = " << *spl << endl;
|
c@92
|
606 }
|
c@92
|
607 }
|
c@92
|
608
|
c@92
|
609 //spl[Y-1]
|
c@92
|
610 //Specs[R0][x0:x0+x-1][Y0:Y0+Y-1]
|
c@92
|
611 //Specs[R0+1][2x0:2x0+2x-1][Y0/2:Y0/2+Y/2-1]
|
c@92
|
612 //...
|
c@92
|
613 //Specs[R0+?][Nx0:Nx0+Nx-1][Y0/N:Y0/N+Y/N-1]
|
c@92
|
614 //N=WID/wid
|
c@92
|
615
|
c@92
|
616 /**
|
c@92
|
617 * DoCutSpectrogramBlock2 finds the optimal "cutting" and returns it in spl.
|
c@92
|
618 */
|
c@92
|
619 double
|
c@92
|
620 AdaptiveSpectrogram::DoCutSpectrogramBlock2(int* spl, double*** Specs, int Y, int R0,
|
c@99
|
621 int x0, int Y0, int N, double& ene, string pfx)
|
c@92
|
622 {
|
c@92
|
623 double ent = 0;
|
c@92
|
624
|
c@99
|
625 #ifdef DEBUG_VERBOSE
|
c@99
|
626 cerr << pfx << "cutting with Y = " << Y << ", R0 = " << R0 << ", x0 = " << x0 << ", Y0 = " << Y0 << ", N = " << N << endl;
|
c@99
|
627 #endif
|
c@99
|
628
|
c@92
|
629 if (Y > N) {
|
c@92
|
630
|
c@99
|
631 #ifdef DEBUG_VERBOSE
|
c@99
|
632 cerr << pfx << "Y > N case, making top/bottom cut" << endl;
|
c@99
|
633 #endif
|
c@99
|
634
|
c@92
|
635 spl[0] = 0;
|
c@92
|
636 double ene1, ene2;
|
c@92
|
637
|
c@92
|
638 ent += DoCutSpectrogramBlock2
|
c@99
|
639 (&spl[1], Specs, Y/2, R0, x0, Y0, N, ene1, pfx + " ");
|
c@92
|
640
|
c@92
|
641 ent += DoCutSpectrogramBlock2
|
c@99
|
642 (&spl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N, ene2, pfx + " ");
|
c@92
|
643
|
c@92
|
644 ene = ene1+ene2;
|
c@92
|
645
|
c@92
|
646 } else if (N == 1) {
|
c@92
|
647
|
c@92
|
648 double tmp = Specs[R0][x0][Y0];
|
c@99
|
649
|
c@99
|
650 #ifdef DEBUG_VERBOSE
|
c@99
|
651 cerr << pfx << "N == 1 case (value here = " << tmp << ")" << endl;
|
c@99
|
652 #endif
|
c@99
|
653
|
c@92
|
654 ene = tmp;
|
c@92
|
655 ent = xlogx(tmp);
|
c@92
|
656
|
c@92
|
657 } else {
|
c@92
|
658 // Y == N, the square case
|
c@92
|
659
|
c@99
|
660 #ifdef DEBUG_VERBOSE
|
c@99
|
661 cerr << pfx << "Y == N case, testing left/right cut" << endl;
|
c@99
|
662 #endif
|
c@99
|
663
|
c@92
|
664 double enel, ener, enet, eneb, entl, entr, entt, entb;
|
c@92
|
665 int* tmpspl = new int[Y];
|
c@92
|
666
|
c@92
|
667 entl = DoCutSpectrogramBlock2
|
c@99
|
668 (&spl[1], Specs, Y/2, R0+1, 2*x0, Y0/2, N/2, enel, pfx + " ");
|
c@92
|
669
|
c@92
|
670 entr = DoCutSpectrogramBlock2
|
c@99
|
671 (&spl[Y/2], Specs, Y/2, R0+1, 2*x0+1, Y0/2, N/2, ener, pfx + " ");
|
c@99
|
672
|
c@99
|
673 #ifdef DEBUG_VERBOSE
|
c@99
|
674 cerr << pfx << "Y == N case, testing top/bottom cut" << endl;
|
c@99
|
675 #endif
|
c@92
|
676
|
c@92
|
677 entb = DoCutSpectrogramBlock2
|
c@99
|
678 (&tmpspl[1], Specs, Y/2, R0, x0, Y0, N/2, eneb, pfx + " ");
|
c@92
|
679
|
c@92
|
680 entt = DoCutSpectrogramBlock2
|
c@99
|
681 (&tmpspl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N/2, enet, pfx + " ");
|
c@92
|
682
|
c@92
|
683 double
|
c@92
|
684 ene0 = enet + eneb,
|
c@92
|
685 ene1 = enel + ener,
|
c@92
|
686 ent0 = entt + entb,
|
c@92
|
687 ent1 = entl + entr;
|
c@92
|
688
|
c@92
|
689 // normalize
|
c@92
|
690
|
c@92
|
691 double eneres = 1 - (ene0+ene1)/2, norment0, norment1;
|
c@92
|
692 double a0 = 1 / (ene0+eneres), a1 = 1 / (ene1+eneres);
|
c@92
|
693
|
c@92
|
694 // quasi-global normalization
|
c@92
|
695
|
c@99
|
696 // norment0 = (ent0 - ene0 * log(ene0+eneres)) / (ene0+eneres);
|
c@99
|
697 // norment1 = (ent1 - ene1 * log(ene1+eneres)) / (ene1+eneres);
|
c@99
|
698 norment0 = ene0;
|
c@99
|
699 norment1 = ene1;
|
c@92
|
700
|
c@92
|
701 // local normalization
|
c@92
|
702
|
c@92
|
703 if (norment1 < norment0) {
|
c@99
|
704 #ifdef DEBUG_VERBOSE
|
c@99
|
705 cerr << pfx << "top/bottom cut wins (" << norment0 << " > " << norment1 << "), returning sum ent " << ent0 << " and ene " << ene0 << endl;
|
c@99
|
706 #endif
|
c@92
|
707 spl[0] = 0;
|
c@92
|
708 ent = ent0, ene = ene0;
|
c@92
|
709 memcpy(&spl[1], &tmpspl[1], sizeof(int)*(Y-2));
|
c@92
|
710 } else {
|
c@99
|
711 #ifdef DEBUG_VERBOSE
|
c@99
|
712 cerr << pfx << "left/right cut wins (" << norment1 << " >= " << norment0 << "), returning sum ent " << ent1 << " and ene " << ene1 << endl;
|
c@99
|
713 #endif
|
c@92
|
714 spl[0] = 1;
|
c@92
|
715 ent = ent1, ene = ene1;
|
c@92
|
716 }
|
c@92
|
717 }
|
c@92
|
718 return ent;
|
c@92
|
719 }
|
c@92
|
720
|
c@92
|
721 /**
|
c@92
|
722 * DoMixSpectrogramBlock2 collects values from the multiple
|
c@92
|
723 * spectrograms Specs into a linear array Spec.
|
c@92
|
724 */
|
c@92
|
725 double
|
c@92
|
726 AdaptiveSpectrogram::DoMixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int Y,
|
c@92
|
727 int R0, int x0, int Y0, bool normmix, int res,
|
c@92
|
728 double* e)
|
c@92
|
729 {
|
c@92
|
730 if (Y == 1) {
|
c@92
|
731
|
c@92
|
732 Spec[0] = Specs[R0][x0][Y0]*e[0];
|
c@92
|
733
|
c@92
|
734 } else {
|
c@92
|
735
|
c@92
|
736 double le[32];
|
c@92
|
737
|
c@92
|
738 if (normmix && Y < (1<<res)) {
|
c@92
|
739
|
c@92
|
740 for (int i = 0, j = 1, k = Y;
|
c@92
|
741 i < res;
|
c@92
|
742 i++, j *= 2, k /= 2) {
|
c@92
|
743
|
c@92
|
744 double lle = 0;
|
c@92
|
745
|
c@92
|
746 for (int fr = 0; fr < j; fr++) {
|
c@92
|
747 for (int n = 0; n < k; n++) {
|
c@92
|
748 lle +=
|
c@92
|
749 Specs[i+R0][x0+fr][Y0+n] *
|
c@92
|
750 Specs[i+R0][x0+fr][Y0+n];
|
c@92
|
751 }
|
c@92
|
752 }
|
c@92
|
753
|
c@92
|
754 lle = sqrt(lle)*e[i];
|
c@92
|
755
|
c@92
|
756 if (i == 0) {
|
c@92
|
757 le[0] = lle;
|
c@92
|
758 } else if (lle > le[0]*2) {
|
c@92
|
759 le[i] = e[i]*le[0]*2/lle;
|
c@92
|
760 } else {
|
c@92
|
761 le[i] = e[i];
|
c@92
|
762 }
|
c@92
|
763 }
|
c@92
|
764
|
c@92
|
765 le[0] = e[0];
|
c@92
|
766
|
c@92
|
767 } else {
|
c@92
|
768
|
c@92
|
769 memcpy(le, e, sizeof(double)*res);
|
c@92
|
770 }
|
c@92
|
771
|
c@92
|
772 if (spl[0] == 0) {
|
c@92
|
773
|
c@92
|
774 int newres;
|
c@92
|
775 if (Y >= (1<<res)) newres = res;
|
c@92
|
776 else newres = res-1;
|
c@92
|
777
|
c@92
|
778 DoMixSpectrogramBlock2
|
c@92
|
779 (&spl[1], Spec, Specs, Y/2, R0, x0, Y0,
|
c@92
|
780 normmix, newres, le);
|
c@92
|
781
|
c@92
|
782 DoMixSpectrogramBlock2
|
c@92
|
783 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0, x0, Y0+Y/2,
|
c@92
|
784 normmix, newres, le);
|
c@92
|
785
|
c@92
|
786 } else {
|
c@92
|
787
|
c@92
|
788 DoMixSpectrogramBlock2
|
c@92
|
789 (&spl[1], Spec, Specs, Y/2, R0+1, x0*2, Y0/2,
|
c@92
|
790 normmix, res-1, &le[1]);
|
c@92
|
791
|
c@92
|
792 DoMixSpectrogramBlock2
|
c@92
|
793 (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0+1, x0*2+1, Y0/2,
|
c@92
|
794 normmix, res-1, &le[1]);
|
c@92
|
795 }
|
c@92
|
796 }
|
c@92
|
797
|
c@92
|
798 return 0;
|
c@92
|
799 }
|
c@92
|
800
|
c@92
|
801 /**
|
c@92
|
802 * MixSpectrogramBlock2 calls the two Do...() to do the real work.
|
c@92
|
803 *
|
c@92
|
804 * At this point:
|
c@92
|
805 * spl is... what? the returned "cutting", organised how?
|
c@92
|
806 * Spec is... what? the returned spectrogram, organised how?
|
c@92
|
807 * Specs is an array of input spectrograms
|
c@92
|
808 * WID is the maximum window size
|
c@92
|
809 * wid is the minimum window size
|
c@92
|
810 * normmix is... what?
|
c@92
|
811 */
|
c@92
|
812 double
|
c@92
|
813 AdaptiveSpectrogram::MixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int
|
c@92
|
814 WID, int wid, bool normmix)
|
c@92
|
815 {
|
c@92
|
816 double ene[32];
|
c@92
|
817
|
c@92
|
818 // find the total energy and normalize
|
c@92
|
819
|
c@92
|
820 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
|
c@92
|
821
|
c@92
|
822 double lene = 0;
|
c@92
|
823
|
c@92
|
824 for (int fr = 0; fr < Fr; fr++) {
|
c@92
|
825 for (int k = 0; k < Wid; k++) {
|
c@92
|
826 lene += Specs[i][fr][k]*Specs[i][fr][k];
|
c@92
|
827 }
|
c@92
|
828 }
|
c@92
|
829
|
c@92
|
830 ene[i] = lene;
|
c@92
|
831
|
c@92
|
832 if (lene != 0) {
|
c@92
|
833 double ilene = 1.0/lene;
|
c@92
|
834 for (int fr = 0; fr < Fr; fr++) {
|
c@92
|
835 for (int k = 0; k < Wid; k++) {
|
c@92
|
836 Specs[i][fr][k] = Specs[i][fr][k]*Specs[i][fr][k]*ilene;
|
c@92
|
837 }
|
c@92
|
838 }
|
c@92
|
839 }
|
c@92
|
840 }
|
c@92
|
841
|
c@92
|
842
|
c@92
|
843 double result = DoCutSpectrogramBlock2
|
c@92
|
844 (spl, Specs, WID, 0, 0, 0, WID/wid, ene[31]);
|
c@92
|
845
|
c@92
|
846 // de-normalize
|
c@92
|
847
|
c@92
|
848 for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
|
c@92
|
849 double lene = ene[i];
|
c@92
|
850 if (lene != 0) {
|
c@92
|
851 for (int fr = 0; fr < Fr; fr++) {
|
c@92
|
852 for (int k = 0; k < Wid; k++) {
|
c@92
|
853 Specs[i][fr][k] = sqrt(Specs[i][fr][k]*lene);
|
c@92
|
854 }
|
c@92
|
855 }
|
c@92
|
856 }
|
c@92
|
857 }
|
c@92
|
858
|
c@92
|
859 double e[32];
|
c@92
|
860 for (int i = 0; i < 32; i++) e[i] = 1;
|
c@92
|
861
|
c@92
|
862 DoMixSpectrogramBlock2
|
c@92
|
863 (spl, Spec, Specs, WID, 0, 0, 0, normmix, log2(WID/wid)+1, e);
|
c@92
|
864
|
c@92
|
865 return result;
|
c@92
|
866 }
|
c@92
|
867
|
c@92
|
868 /**
|
c@92
|
869 * MixSpectrogram2 does the work for Fr frames (the largest frame),
|
c@92
|
870 * which basically calls MixSpectrogramBlock2 Fr times.
|
c@92
|
871 *
|
c@92
|
872 * the 3-D array Specs is the multiple spectrograms calculated with
|
c@92
|
873 * window sizes between wid and WID, Specs[0] is the 0th spectrogram,
|
c@92
|
874 * etc.
|
c@92
|
875 *
|
c@92
|
876 * spl and Spec for all frames are returned by MixSpectrogram2, each
|
c@92
|
877 * as a 2-D array.
|
c@92
|
878 */
|
c@92
|
879 double
|
c@92
|
880 AdaptiveSpectrogram::MixSpectrogram2(int** spl, double** Spec, double*** Specs, int Fr,
|
c@92
|
881 int WID, int wid, bool norm, bool normmix)
|
c@92
|
882 {
|
c@92
|
883 // totally Fr frames of WID samples
|
c@92
|
884 // each frame is divided into wid VERTICAL parts
|
c@92
|
885
|
c@92
|
886 int Res = log2(WID/wid)+1;
|
c@92
|
887 double*** lSpecs = new double**[Res];
|
c@92
|
888
|
c@92
|
889 for (int i = 0; i < Fr; i++) {
|
c@92
|
890
|
c@92
|
891 for (int j = 0, nfr = 1; j < Res; j++, nfr *= 2) {
|
c@92
|
892 lSpecs[j] = &Specs[j][i*nfr];
|
c@92
|
893 }
|
c@92
|
894
|
c@92
|
895 MixSpectrogramBlock2(spl[i], Spec[i], lSpecs, WID, wid, norm);
|
c@92
|
896 }
|
c@92
|
897
|
c@92
|
898 delete[] lSpecs;
|
c@92
|
899 return 0;
|
c@92
|
900 }
|
c@92
|
901
|