Mercurial > hg > match-vamp
comparison Matcher.cpp @ 0:640f92242cc1
* initial import
author | cannam |
---|---|
date | Wed, 24 Oct 2007 12:13:43 +0000 |
parents | |
children | de792b8c2801 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:640f92242cc1 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Vamp feature extraction plugin using the MATCH audio alignment | |
5 algorithm. | |
6 | |
7 Centre for Digital Music, Queen Mary, University of London. | |
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. | |
9 | |
10 This program is free software; you can redistribute it and/or | |
11 modify it under the terms of the GNU General Public License as | |
12 published by the Free Software Foundation; either version 2 of the | |
13 License, or (at your option) any later version. See the file | |
14 COPYING included with this distribution for more information. | |
15 */ | |
16 | |
17 #include "Matcher.h" | |
18 #include "Finder.h" | |
19 | |
20 #include <iostream> | |
21 | |
22 bool Matcher::silent = true; | |
23 | |
24 Matcher::Matcher(float rate, Matcher *p) | |
25 { | |
26 std::cerr << "Matcher::Matcher(" << rate << ", " << p << ")" << std::endl; | |
27 | |
28 sampleRate = rate; | |
29 otherMatcher = p; // the first matcher will need this to be set later | |
30 firstPM = (!p); | |
31 matchFileOffset = 0; | |
32 ltAverage = 0; | |
33 frameCount = 0; | |
34 runCount = 0; | |
35 paused = false; | |
36 hopSize = 0; | |
37 fftSize = 0; | |
38 blockSize = 0; | |
39 hopTime = 0.020; // DEFAULT, overridden with -h //!!! | |
40 fftTime = 0.04644; // DEFAULT, overridden with -f | |
41 blockTime = 10.0; // DEFAULT, overridden with -c | |
42 normalise1 = true; | |
43 normalise2 = false; | |
44 normalise3 = false; | |
45 normalise4 = true; | |
46 useSpectralDifference = true; | |
47 useChromaFrequencyMap = false; | |
48 scale = 90; | |
49 maxFrames = 0; // stop at EOF | |
50 | |
51 hopSize = lrint(sampleRate * hopTime); | |
52 fftSize = lrint(pow(2, lrint(log(fftTime * sampleRate) / log(2)))); | |
53 blockSize = lrint(blockTime / hopTime); | |
54 | |
55 distance = 0; | |
56 bestPathCost = 0; | |
57 distYSizes = 0; | |
58 distXSize = 0; | |
59 | |
60 initialised = false; | |
61 | |
62 } // default constructor | |
63 | |
64 Matcher::~Matcher() | |
65 { | |
66 std::cerr << "Matcher(" << this << ")::~Matcher()" << std::endl; | |
67 | |
68 if (initialised) { | |
69 | |
70 for (int i = 0; i < distXSize; ++i) { | |
71 if (distance[i]) { | |
72 free(distance[i]); | |
73 free(bestPathCost[i]); | |
74 } | |
75 } | |
76 free(distance); | |
77 free(bestPathCost); | |
78 | |
79 free(first); | |
80 free(last); | |
81 | |
82 free(distYSizes); | |
83 } | |
84 } | |
85 | |
86 void | |
87 Matcher::print() | |
88 { | |
89 cerr << toString() << endl; | |
90 } // print() | |
91 | |
92 string | |
93 Matcher::toString() | |
94 { | |
95 std::stringstream os; | |
96 os << "Matcher " << this << ": (" << sampleRate | |
97 << "kHz)" | |
98 << "\n\tHop size: " << hopSize | |
99 << "\n\tFFT size: " << fftSize | |
100 << "\n\tBlock size: " << blockSize; | |
101 return os.str(); | |
102 } // toString() | |
103 | |
104 void | |
105 Matcher::init() | |
106 { | |
107 if (initialised) return; | |
108 | |
109 initialised = true; | |
110 | |
111 makeFreqMap(fftSize, sampleRate); | |
112 | |
113 initVector<double>(prevFrame, freqMapSize); | |
114 initVector<double>(newFrame, freqMapSize); | |
115 initMatrix<double>(frames, blockSize, freqMapSize + 1); | |
116 | |
117 int distSize = (MAX_RUN_COUNT + 1) * blockSize; | |
118 | |
119 distXSize = blockSize * 2; | |
120 | |
121 // std::cerr << "Matcher::init: distXSize = " << distXSize << std::endl; | |
122 | |
123 distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *)); | |
124 bestPathCost = (int **)malloc(distXSize * sizeof(int *)); | |
125 distYSizes = (int *)malloc(distXSize * sizeof(int)); | |
126 | |
127 for (int i = 0; i < blockSize; ++i) { | |
128 distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char)); | |
129 bestPathCost[i] = (int *)malloc(distSize * sizeof(int)); | |
130 distYSizes[i] = distSize; | |
131 } | |
132 for (int i = blockSize; i < distXSize; ++i) { | |
133 distance[i] = 0; | |
134 } | |
135 | |
136 first = (int *)malloc(distXSize * sizeof(int)); | |
137 last = (int *)malloc(distXSize * sizeof(int)); | |
138 | |
139 frameCount = 0; | |
140 runCount = 0; | |
141 // frameRMS = 0; | |
142 ltAverage = 0; | |
143 | |
144 if (!silent) print(); | |
145 } // init | |
146 | |
147 void | |
148 Matcher::makeFreqMap(int fftSize, float sampleRate) | |
149 { | |
150 initVector<int>(freqMap, fftSize/2 + 1); | |
151 if (useChromaFrequencyMap) | |
152 makeChromaFrequencyMap(fftSize, sampleRate); | |
153 else | |
154 makeStandardFrequencyMap(fftSize, sampleRate); | |
155 } // makeFreqMap() | |
156 | |
157 void | |
158 Matcher::makeStandardFrequencyMap(int fftSize, float sampleRate) | |
159 { | |
160 double binWidth = sampleRate / fftSize; | |
161 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); | |
162 int crossoverMidi = lrint(log(crossoverBin*binWidth/440)/ | |
163 log(2) * 12 + 69); | |
164 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; | |
165 int i = 0; | |
166 while (i <= crossoverBin) { | |
167 freqMap[i] = i; | |
168 ++i; | |
169 } | |
170 while (i <= fftSize/2) { | |
171 double midi = log(i*binWidth/440) / log(2) * 12 + 69; | |
172 if (midi > 127) | |
173 midi = 127; | |
174 freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi; | |
175 } | |
176 freqMapSize = freqMap[i-1] + 1; | |
177 if (!silent) { | |
178 cerr << "Standard map size: " << freqMapSize | |
179 << "; Crossover at: " << crossoverBin << endl; | |
180 //!!! for (i = 0; i < fftSize / 2; i++) | |
181 // cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; | |
182 } | |
183 } // makeStandardFrequencyMap() | |
184 | |
185 void | |
186 Matcher::makeChromaFrequencyMap(int fftSize, float sampleRate) | |
187 { | |
188 double binWidth = sampleRate / fftSize; | |
189 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); | |
190 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; | |
191 int i = 0; | |
192 while (i <= crossoverBin) | |
193 freqMap[i++] = 0; | |
194 while (i <= fftSize/2) { | |
195 double midi = log(i*binWidth/440) / log(2) * 12 + 69; | |
196 freqMap[i++] = (lrint(midi)) % 12 + 1; | |
197 } | |
198 freqMapSize = 13; | |
199 if (!silent) { | |
200 cerr << "Chroma map size: " << freqMapSize | |
201 << "; Crossover at: " << crossoverBin << endl; | |
202 for (i = 0; i < fftSize / 2; i++) | |
203 cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; | |
204 } | |
205 } // makeChromaFrequencyMap() | |
206 | |
207 void | |
208 Matcher::processFrame(double *reBuffer, double *imBuffer) | |
209 { | |
210 if (!initialised) init(); | |
211 | |
212 for (int i = 0; i < (int)newFrame.size(); ++i) { | |
213 newFrame[i] = 0; | |
214 } | |
215 double rms = 0; | |
216 for (int i = 0; i <= fftSize/2; i++) { | |
217 double mag = reBuffer[i] * reBuffer[i] + | |
218 imBuffer[i] * imBuffer[i]; | |
219 rms += mag; | |
220 newFrame[freqMap[i]] += mag; | |
221 } | |
222 rms = sqrt(rms / (fftSize/2)); | |
223 | |
224 int frameIndex = frameCount % blockSize; | |
225 | |
226 if (frameCount >= distXSize) { | |
227 // std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl; | |
228 distXSize *= 2; | |
229 distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *)); | |
230 bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *)); | |
231 distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int)); | |
232 first = (int *)realloc(first, distXSize * sizeof(int)); | |
233 last = (int *)realloc(last, distXSize * sizeof(int)); | |
234 | |
235 for (int i = distXSize/2; i < distXSize; ++i) { | |
236 distance[i] = 0; | |
237 } | |
238 } | |
239 | |
240 if (firstPM && (frameCount >= blockSize)) { | |
241 | |
242 int len = last[frameCount - blockSize] - | |
243 first[frameCount - blockSize]; | |
244 | |
245 // We need to copy distance[frameCount-blockSize] to | |
246 // distance[frameCount], and then truncate | |
247 // distance[frameCount-blockSize] to its first len elements. | |
248 // Same for bestPathCost. | |
249 /* | |
250 std::cerr << "moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to " | |
251 << frameCount << ", allocating " << len << " for " | |
252 << frameCount - blockSize << std::endl; | |
253 */ | |
254 distance[frameCount] = distance[frameCount - blockSize]; | |
255 | |
256 distance[frameCount - blockSize] = (unsigned char *) | |
257 malloc(len * sizeof(unsigned char)); | |
258 for (int i = 0; i < len; ++i) { | |
259 distance[frameCount - blockSize][i] = | |
260 distance[frameCount][i]; | |
261 } | |
262 | |
263 bestPathCost[frameCount] = bestPathCost[frameCount - blockSize]; | |
264 | |
265 bestPathCost[frameCount - blockSize] = (int *) | |
266 malloc(len * sizeof(int)); | |
267 for (int i = 0; i < len; ++i) { | |
268 bestPathCost[frameCount - blockSize][i] = | |
269 bestPathCost[frameCount][i]; | |
270 } | |
271 | |
272 distYSizes[frameCount] = distYSizes[frameCount - blockSize]; | |
273 distYSizes[frameCount - blockSize] = len; | |
274 } | |
275 | |
276 double totalEnergy = 0; | |
277 if (useSpectralDifference) { | |
278 for (int i = 0; i < freqMapSize; i++) { | |
279 totalEnergy += newFrame[i]; | |
280 if (newFrame[i] > prevFrame[i]) { | |
281 frames[frameIndex][i] = newFrame[i] - prevFrame[i]; | |
282 } else { | |
283 frames[frameIndex][i] = 0; | |
284 } | |
285 } | |
286 } else { | |
287 for (int i = 0; i < freqMapSize; i++) { | |
288 frames[frameIndex][i] = newFrame[i]; | |
289 totalEnergy += frames[frameIndex][i]; | |
290 } | |
291 } | |
292 frames[frameIndex][freqMapSize] = totalEnergy; | |
293 | |
294 double decay = frameCount >= 200 ? 0.99: | |
295 (frameCount < 100? 0: (frameCount - 100) / 100.0); | |
296 | |
297 if (ltAverage == 0) | |
298 ltAverage = totalEnergy; | |
299 else | |
300 ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); | |
301 | |
302 // System.err.println(Format.d(ltAverage,4) + " " + | |
303 // Format.d(totalEnergy) + " " + | |
304 // Format.d(frameRMS)); | |
305 | |
306 // std::cerr << "ltAverage: " << ltAverage << ", totalEnergy: " << totalEnergy << ", frameRMS: " << rms << std::endl; | |
307 | |
308 if (rms <= 0.01) //!!! silenceThreshold) | |
309 for (int i = 0; i < freqMapSize; i++) | |
310 frames[frameIndex][i] = 0; | |
311 else if (normalise1) | |
312 for (int i = 0; i < freqMapSize; i++) | |
313 frames[frameIndex][i] /= totalEnergy; | |
314 else if (normalise3) | |
315 for (int i = 0; i < freqMapSize; i++) | |
316 frames[frameIndex][i] /= ltAverage; | |
317 | |
318 int stop = otherMatcher->frameCount; | |
319 int index = stop - blockSize; | |
320 if (index < 0) | |
321 index = 0; | |
322 first[frameCount] = index; | |
323 last[frameCount] = stop; | |
324 | |
325 bool overflow = false; | |
326 int mn= -1; | |
327 int mx= -1; | |
328 for ( ; index < stop; index++) { | |
329 int dMN = calcDistance(frames[frameIndex], | |
330 otherMatcher->frames[index % blockSize]); | |
331 if (mx<0) | |
332 mx = mn = dMN; | |
333 else if (dMN > mx) | |
334 mx = dMN; | |
335 else if (dMN < mn) | |
336 mn = dMN; | |
337 if (dMN >= 255) { | |
338 overflow = true; | |
339 dMN = 255; | |
340 } | |
341 if ((frameCount == 0) && (index == 0)) // first element | |
342 setValue(0, 0, 0, 0, dMN); | |
343 else if (frameCount == 0) // first row | |
344 setValue(0, index, ADVANCE_OTHER, | |
345 getValue(0, index-1, true), dMN); | |
346 else if (index == 0) // first column | |
347 setValue(frameCount, index, ADVANCE_THIS, | |
348 getValue(frameCount - 1, 0, true), dMN); | |
349 else if (index == otherMatcher->frameCount - blockSize) { | |
350 // missing value(s) due to cutoff | |
351 // - no previous value in current row (resp. column) | |
352 // - no diagonal value if prev. dir. == curr. dirn | |
353 int min2 = getValue(frameCount - 1, index, true); | |
354 // if ((firstPM && (first[frameCount - 1] == index)) || | |
355 // (!firstPM && (last[index-1] < frameCount))) | |
356 if (first[frameCount - 1] == index) | |
357 setValue(frameCount, index, ADVANCE_THIS, min2, dMN); | |
358 else { | |
359 int min1 = getValue(frameCount - 1, index - 1, true); | |
360 if (min1 + dMN <= min2) | |
361 setValue(frameCount, index, ADVANCE_BOTH, min1,dMN); | |
362 else | |
363 setValue(frameCount, index, ADVANCE_THIS, min2,dMN); | |
364 } | |
365 } else { | |
366 int min1 = getValue(frameCount, index-1, true); | |
367 int min2 = getValue(frameCount - 1, index, true); | |
368 int min3 = getValue(frameCount - 1, index-1, true); | |
369 if (min1 <= min2) { | |
370 if (min3 + dMN <= min1) | |
371 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); | |
372 else | |
373 setValue(frameCount, index, ADVANCE_OTHER,min1,dMN); | |
374 } else { | |
375 if (min3 + dMN <= min2) | |
376 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); | |
377 else | |
378 setValue(frameCount, index, ADVANCE_THIS, min2,dMN); | |
379 } | |
380 } | |
381 otherMatcher->last[index]++; | |
382 } // loop for row (resp. column) | |
383 | |
384 vector<double> tmp = prevFrame; | |
385 prevFrame = newFrame; | |
386 newFrame = tmp; | |
387 | |
388 frameCount++; | |
389 runCount++; | |
390 | |
391 otherMatcher->runCount = 0; | |
392 | |
393 if (overflow && !silent) | |
394 cerr << "WARNING: overflow in distance metric: " | |
395 << "frame " << frameCount << ", val = " << mx << endl; | |
396 | |
397 if (!silent) | |
398 std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl; | |
399 | |
400 if ((frameCount % 100) == 0) { | |
401 if (!silent) { | |
402 cerr << "Progress:" << frameCount << " " << ltAverage << endl; | |
403 // Profile.report(); | |
404 } | |
405 } | |
406 //!!! if (frameCount == maxFrames) | |
407 // closeStreams(); | |
408 // } | |
409 } // processFrame() | |
410 | |
411 int | |
412 Matcher::calcDistance(const vector<double> &f1, const vector<double> &f2) | |
413 { | |
414 double d = 0; | |
415 double sum = 0; | |
416 for (int i = 0; i < freqMapSize; i++) { | |
417 d += fabs(f1[i] - f2[i]); | |
418 sum += f1[i] + f2[i]; | |
419 } | |
420 // System.err.print(" " + Format.d(d,3)); | |
421 if (sum == 0) | |
422 return 0; | |
423 if (normalise2) | |
424 return (int)(scale * d / sum); // 0 <= d/sum <= 2 | |
425 if (!normalise4) | |
426 return (int)(scale * d); | |
427 // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0; | |
428 double weight = (8 + log(sum)) / 10.0; | |
429 // if (weight < mins) { | |
430 // mins = weight; | |
431 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs)); | |
432 // } | |
433 // if (weight > maxs) { | |
434 // maxs = weight; | |
435 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs)); | |
436 // } | |
437 if (weight < 0) | |
438 weight = 0; | |
439 else if (weight > 1) | |
440 weight = 1; | |
441 return (int)(scale * d / sum * weight); | |
442 } // calcDistance() | |
443 | |
444 int | |
445 Matcher::getValue(int i, int j, bool firstAttempt) | |
446 { | |
447 if (firstPM) | |
448 return bestPathCost[i][j - first[i]]; | |
449 else | |
450 return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]]; | |
451 } // getValue() | |
452 | |
453 void | |
454 Matcher::setValue(int i, int j, int dir, int value, int dMN) | |
455 { | |
456 if (firstPM) { | |
457 distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir); | |
458 bestPathCost[i][j - first[i]] = | |
459 (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); | |
460 } else { | |
461 if (dir == ADVANCE_THIS) | |
462 dir = ADVANCE_OTHER; | |
463 else if (dir == ADVANCE_OTHER) | |
464 dir = ADVANCE_THIS; | |
465 int idx = i - otherMatcher->first[j]; | |
466 if (idx == (int)otherMatcher->distYSizes[j]) { | |
467 // This should never happen, but if we allow arbitrary | |
468 // pauses in either direction, and arbitrary lengths at | |
469 // end, it is better than a segmentation fault. | |
470 std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl; | |
471 otherMatcher->distYSizes[j] = idx * 2; | |
472 otherMatcher->bestPathCost[j] = | |
473 (int *)realloc(otherMatcher->bestPathCost[j], | |
474 idx * 2 * sizeof(int)); | |
475 otherMatcher->distance[j] = | |
476 (unsigned char *)realloc(otherMatcher->distance[j], | |
477 idx * 2 * sizeof(unsigned char)); | |
478 } | |
479 otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir); | |
480 otherMatcher->bestPathCost[j][idx] = | |
481 (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); | |
482 } | |
483 } // setValue() | |
484 |