comparison Matcher.cpp @ 0:640f92242cc1

* initial import
author cannam
date Wed, 24 Oct 2007 12:13:43 +0000
parents
children de792b8c2801
comparison
equal deleted inserted replaced
-1:000000000000 0:640f92242cc1
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 Vamp feature extraction plugin using the MATCH audio alignment
5 algorithm.
6
7 Centre for Digital Music, Queen Mary, University of London.
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
9
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2 of the
13 License, or (at your option) any later version. See the file
14 COPYING included with this distribution for more information.
15 */
16
17 #include "Matcher.h"
18 #include "Finder.h"
19
20 #include <iostream>
21
22 bool Matcher::silent = true;
23
24 Matcher::Matcher(float rate, Matcher *p)
25 {
26 std::cerr << "Matcher::Matcher(" << rate << ", " << p << ")" << std::endl;
27
28 sampleRate = rate;
29 otherMatcher = p; // the first matcher will need this to be set later
30 firstPM = (!p);
31 matchFileOffset = 0;
32 ltAverage = 0;
33 frameCount = 0;
34 runCount = 0;
35 paused = false;
36 hopSize = 0;
37 fftSize = 0;
38 blockSize = 0;
39 hopTime = 0.020; // DEFAULT, overridden with -h //!!!
40 fftTime = 0.04644; // DEFAULT, overridden with -f
41 blockTime = 10.0; // DEFAULT, overridden with -c
42 normalise1 = true;
43 normalise2 = false;
44 normalise3 = false;
45 normalise4 = true;
46 useSpectralDifference = true;
47 useChromaFrequencyMap = false;
48 scale = 90;
49 maxFrames = 0; // stop at EOF
50
51 hopSize = lrint(sampleRate * hopTime);
52 fftSize = lrint(pow(2, lrint(log(fftTime * sampleRate) / log(2))));
53 blockSize = lrint(blockTime / hopTime);
54
55 distance = 0;
56 bestPathCost = 0;
57 distYSizes = 0;
58 distXSize = 0;
59
60 initialised = false;
61
62 } // default constructor
63
64 Matcher::~Matcher()
65 {
66 std::cerr << "Matcher(" << this << ")::~Matcher()" << std::endl;
67
68 if (initialised) {
69
70 for (int i = 0; i < distXSize; ++i) {
71 if (distance[i]) {
72 free(distance[i]);
73 free(bestPathCost[i]);
74 }
75 }
76 free(distance);
77 free(bestPathCost);
78
79 free(first);
80 free(last);
81
82 free(distYSizes);
83 }
84 }
85
86 void
87 Matcher::print()
88 {
89 cerr << toString() << endl;
90 } // print()
91
92 string
93 Matcher::toString()
94 {
95 std::stringstream os;
96 os << "Matcher " << this << ": (" << sampleRate
97 << "kHz)"
98 << "\n\tHop size: " << hopSize
99 << "\n\tFFT size: " << fftSize
100 << "\n\tBlock size: " << blockSize;
101 return os.str();
102 } // toString()
103
104 void
105 Matcher::init()
106 {
107 if (initialised) return;
108
109 initialised = true;
110
111 makeFreqMap(fftSize, sampleRate);
112
113 initVector<double>(prevFrame, freqMapSize);
114 initVector<double>(newFrame, freqMapSize);
115 initMatrix<double>(frames, blockSize, freqMapSize + 1);
116
117 int distSize = (MAX_RUN_COUNT + 1) * blockSize;
118
119 distXSize = blockSize * 2;
120
121 // std::cerr << "Matcher::init: distXSize = " << distXSize << std::endl;
122
123 distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *));
124 bestPathCost = (int **)malloc(distXSize * sizeof(int *));
125 distYSizes = (int *)malloc(distXSize * sizeof(int));
126
127 for (int i = 0; i < blockSize; ++i) {
128 distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char));
129 bestPathCost[i] = (int *)malloc(distSize * sizeof(int));
130 distYSizes[i] = distSize;
131 }
132 for (int i = blockSize; i < distXSize; ++i) {
133 distance[i] = 0;
134 }
135
136 first = (int *)malloc(distXSize * sizeof(int));
137 last = (int *)malloc(distXSize * sizeof(int));
138
139 frameCount = 0;
140 runCount = 0;
141 // frameRMS = 0;
142 ltAverage = 0;
143
144 if (!silent) print();
145 } // init
146
147 void
148 Matcher::makeFreqMap(int fftSize, float sampleRate)
149 {
150 initVector<int>(freqMap, fftSize/2 + 1);
151 if (useChromaFrequencyMap)
152 makeChromaFrequencyMap(fftSize, sampleRate);
153 else
154 makeStandardFrequencyMap(fftSize, sampleRate);
155 } // makeFreqMap()
156
157 void
158 Matcher::makeStandardFrequencyMap(int fftSize, float sampleRate)
159 {
160 double binWidth = sampleRate / fftSize;
161 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
162 int crossoverMidi = lrint(log(crossoverBin*binWidth/440)/
163 log(2) * 12 + 69);
164 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
165 int i = 0;
166 while (i <= crossoverBin) {
167 freqMap[i] = i;
168 ++i;
169 }
170 while (i <= fftSize/2) {
171 double midi = log(i*binWidth/440) / log(2) * 12 + 69;
172 if (midi > 127)
173 midi = 127;
174 freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi;
175 }
176 freqMapSize = freqMap[i-1] + 1;
177 if (!silent) {
178 cerr << "Standard map size: " << freqMapSize
179 << "; Crossover at: " << crossoverBin << endl;
180 //!!! for (i = 0; i < fftSize / 2; i++)
181 // cerr << "freqMap[" << i << "] = " << freqMap[i] << endl;
182 }
183 } // makeStandardFrequencyMap()
184
185 void
186 Matcher::makeChromaFrequencyMap(int fftSize, float sampleRate)
187 {
188 double binWidth = sampleRate / fftSize;
189 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
190 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
191 int i = 0;
192 while (i <= crossoverBin)
193 freqMap[i++] = 0;
194 while (i <= fftSize/2) {
195 double midi = log(i*binWidth/440) / log(2) * 12 + 69;
196 freqMap[i++] = (lrint(midi)) % 12 + 1;
197 }
198 freqMapSize = 13;
199 if (!silent) {
200 cerr << "Chroma map size: " << freqMapSize
201 << "; Crossover at: " << crossoverBin << endl;
202 for (i = 0; i < fftSize / 2; i++)
203 cerr << "freqMap[" << i << "] = " << freqMap[i] << endl;
204 }
205 } // makeChromaFrequencyMap()
206
207 void
208 Matcher::processFrame(double *reBuffer, double *imBuffer)
209 {
210 if (!initialised) init();
211
212 for (int i = 0; i < (int)newFrame.size(); ++i) {
213 newFrame[i] = 0;
214 }
215 double rms = 0;
216 for (int i = 0; i <= fftSize/2; i++) {
217 double mag = reBuffer[i] * reBuffer[i] +
218 imBuffer[i] * imBuffer[i];
219 rms += mag;
220 newFrame[freqMap[i]] += mag;
221 }
222 rms = sqrt(rms / (fftSize/2));
223
224 int frameIndex = frameCount % blockSize;
225
226 if (frameCount >= distXSize) {
227 // std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl;
228 distXSize *= 2;
229 distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *));
230 bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *));
231 distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int));
232 first = (int *)realloc(first, distXSize * sizeof(int));
233 last = (int *)realloc(last, distXSize * sizeof(int));
234
235 for (int i = distXSize/2; i < distXSize; ++i) {
236 distance[i] = 0;
237 }
238 }
239
240 if (firstPM && (frameCount >= blockSize)) {
241
242 int len = last[frameCount - blockSize] -
243 first[frameCount - blockSize];
244
245 // We need to copy distance[frameCount-blockSize] to
246 // distance[frameCount], and then truncate
247 // distance[frameCount-blockSize] to its first len elements.
248 // Same for bestPathCost.
249 /*
250 std::cerr << "moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to "
251 << frameCount << ", allocating " << len << " for "
252 << frameCount - blockSize << std::endl;
253 */
254 distance[frameCount] = distance[frameCount - blockSize];
255
256 distance[frameCount - blockSize] = (unsigned char *)
257 malloc(len * sizeof(unsigned char));
258 for (int i = 0; i < len; ++i) {
259 distance[frameCount - blockSize][i] =
260 distance[frameCount][i];
261 }
262
263 bestPathCost[frameCount] = bestPathCost[frameCount - blockSize];
264
265 bestPathCost[frameCount - blockSize] = (int *)
266 malloc(len * sizeof(int));
267 for (int i = 0; i < len; ++i) {
268 bestPathCost[frameCount - blockSize][i] =
269 bestPathCost[frameCount][i];
270 }
271
272 distYSizes[frameCount] = distYSizes[frameCount - blockSize];
273 distYSizes[frameCount - blockSize] = len;
274 }
275
276 double totalEnergy = 0;
277 if (useSpectralDifference) {
278 for (int i = 0; i < freqMapSize; i++) {
279 totalEnergy += newFrame[i];
280 if (newFrame[i] > prevFrame[i]) {
281 frames[frameIndex][i] = newFrame[i] - prevFrame[i];
282 } else {
283 frames[frameIndex][i] = 0;
284 }
285 }
286 } else {
287 for (int i = 0; i < freqMapSize; i++) {
288 frames[frameIndex][i] = newFrame[i];
289 totalEnergy += frames[frameIndex][i];
290 }
291 }
292 frames[frameIndex][freqMapSize] = totalEnergy;
293
294 double decay = frameCount >= 200 ? 0.99:
295 (frameCount < 100? 0: (frameCount - 100) / 100.0);
296
297 if (ltAverage == 0)
298 ltAverage = totalEnergy;
299 else
300 ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay);
301
302 // System.err.println(Format.d(ltAverage,4) + " " +
303 // Format.d(totalEnergy) + " " +
304 // Format.d(frameRMS));
305
306 // std::cerr << "ltAverage: " << ltAverage << ", totalEnergy: " << totalEnergy << ", frameRMS: " << rms << std::endl;
307
308 if (rms <= 0.01) //!!! silenceThreshold)
309 for (int i = 0; i < freqMapSize; i++)
310 frames[frameIndex][i] = 0;
311 else if (normalise1)
312 for (int i = 0; i < freqMapSize; i++)
313 frames[frameIndex][i] /= totalEnergy;
314 else if (normalise3)
315 for (int i = 0; i < freqMapSize; i++)
316 frames[frameIndex][i] /= ltAverage;
317
318 int stop = otherMatcher->frameCount;
319 int index = stop - blockSize;
320 if (index < 0)
321 index = 0;
322 first[frameCount] = index;
323 last[frameCount] = stop;
324
325 bool overflow = false;
326 int mn= -1;
327 int mx= -1;
328 for ( ; index < stop; index++) {
329 int dMN = calcDistance(frames[frameIndex],
330 otherMatcher->frames[index % blockSize]);
331 if (mx<0)
332 mx = mn = dMN;
333 else if (dMN > mx)
334 mx = dMN;
335 else if (dMN < mn)
336 mn = dMN;
337 if (dMN >= 255) {
338 overflow = true;
339 dMN = 255;
340 }
341 if ((frameCount == 0) && (index == 0)) // first element
342 setValue(0, 0, 0, 0, dMN);
343 else if (frameCount == 0) // first row
344 setValue(0, index, ADVANCE_OTHER,
345 getValue(0, index-1, true), dMN);
346 else if (index == 0) // first column
347 setValue(frameCount, index, ADVANCE_THIS,
348 getValue(frameCount - 1, 0, true), dMN);
349 else if (index == otherMatcher->frameCount - blockSize) {
350 // missing value(s) due to cutoff
351 // - no previous value in current row (resp. column)
352 // - no diagonal value if prev. dir. == curr. dirn
353 int min2 = getValue(frameCount - 1, index, true);
354 // if ((firstPM && (first[frameCount - 1] == index)) ||
355 // (!firstPM && (last[index-1] < frameCount)))
356 if (first[frameCount - 1] == index)
357 setValue(frameCount, index, ADVANCE_THIS, min2, dMN);
358 else {
359 int min1 = getValue(frameCount - 1, index - 1, true);
360 if (min1 + dMN <= min2)
361 setValue(frameCount, index, ADVANCE_BOTH, min1,dMN);
362 else
363 setValue(frameCount, index, ADVANCE_THIS, min2,dMN);
364 }
365 } else {
366 int min1 = getValue(frameCount, index-1, true);
367 int min2 = getValue(frameCount - 1, index, true);
368 int min3 = getValue(frameCount - 1, index-1, true);
369 if (min1 <= min2) {
370 if (min3 + dMN <= min1)
371 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN);
372 else
373 setValue(frameCount, index, ADVANCE_OTHER,min1,dMN);
374 } else {
375 if (min3 + dMN <= min2)
376 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN);
377 else
378 setValue(frameCount, index, ADVANCE_THIS, min2,dMN);
379 }
380 }
381 otherMatcher->last[index]++;
382 } // loop for row (resp. column)
383
384 vector<double> tmp = prevFrame;
385 prevFrame = newFrame;
386 newFrame = tmp;
387
388 frameCount++;
389 runCount++;
390
391 otherMatcher->runCount = 0;
392
393 if (overflow && !silent)
394 cerr << "WARNING: overflow in distance metric: "
395 << "frame " << frameCount << ", val = " << mx << endl;
396
397 if (!silent)
398 std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl;
399
400 if ((frameCount % 100) == 0) {
401 if (!silent) {
402 cerr << "Progress:" << frameCount << " " << ltAverage << endl;
403 // Profile.report();
404 }
405 }
406 //!!! if (frameCount == maxFrames)
407 // closeStreams();
408 // }
409 } // processFrame()
410
411 int
412 Matcher::calcDistance(const vector<double> &f1, const vector<double> &f2)
413 {
414 double d = 0;
415 double sum = 0;
416 for (int i = 0; i < freqMapSize; i++) {
417 d += fabs(f1[i] - f2[i]);
418 sum += f1[i] + f2[i];
419 }
420 // System.err.print(" " + Format.d(d,3));
421 if (sum == 0)
422 return 0;
423 if (normalise2)
424 return (int)(scale * d / sum); // 0 <= d/sum <= 2
425 if (!normalise4)
426 return (int)(scale * d);
427 // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0;
428 double weight = (8 + log(sum)) / 10.0;
429 // if (weight < mins) {
430 // mins = weight;
431 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs));
432 // }
433 // if (weight > maxs) {
434 // maxs = weight;
435 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs));
436 // }
437 if (weight < 0)
438 weight = 0;
439 else if (weight > 1)
440 weight = 1;
441 return (int)(scale * d / sum * weight);
442 } // calcDistance()
443
444 int
445 Matcher::getValue(int i, int j, bool firstAttempt)
446 {
447 if (firstPM)
448 return bestPathCost[i][j - first[i]];
449 else
450 return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]];
451 } // getValue()
452
453 void
454 Matcher::setValue(int i, int j, int dir, int value, int dMN)
455 {
456 if (firstPM) {
457 distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir);
458 bestPathCost[i][j - first[i]] =
459 (value + (dir==ADVANCE_BOTH? dMN*2: dMN));
460 } else {
461 if (dir == ADVANCE_THIS)
462 dir = ADVANCE_OTHER;
463 else if (dir == ADVANCE_OTHER)
464 dir = ADVANCE_THIS;
465 int idx = i - otherMatcher->first[j];
466 if (idx == (int)otherMatcher->distYSizes[j]) {
467 // This should never happen, but if we allow arbitrary
468 // pauses in either direction, and arbitrary lengths at
469 // end, it is better than a segmentation fault.
470 std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl;
471 otherMatcher->distYSizes[j] = idx * 2;
472 otherMatcher->bestPathCost[j] =
473 (int *)realloc(otherMatcher->bestPathCost[j],
474 idx * 2 * sizeof(int));
475 otherMatcher->distance[j] =
476 (unsigned char *)realloc(otherMatcher->distance[j],
477 idx * 2 * sizeof(unsigned char));
478 }
479 otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir);
480 otherMatcher->bestPathCost[j][idx] =
481 (value + (dir==ADVANCE_BOTH? dMN*2: dMN));
482 }
483 } // setValue()
484