annotate Matcher.cpp @ 15:a82276091bbd

Pull out Matcher parameters into an object
author Chris Cannam
date Fri, 10 Oct 2014 12:55:05 +0100
parents cdead4a52755
children 4c8526c5bf58
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
cannam@0 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
cannam@0 9
cannam@0 10 This program is free software; you can redistribute it and/or
cannam@0 11 modify it under the terms of the GNU General Public License as
cannam@0 12 published by the Free Software Foundation; either version 2 of the
cannam@0 13 License, or (at your option) any later version. See the file
cannam@0 14 COPYING included with this distribution for more information.
cannam@0 15 */
cannam@0 16
cannam@0 17 #include "Matcher.h"
cannam@0 18
cannam@0 19 #include <iostream>
cannam@0 20
cannam@4 21 #include <cstdlib>
cannam@4 22
cannam@0 23 bool Matcher::silent = true;
cannam@0 24
Chris@10 25 //#define DEBUG_MATCHER 1
Chris@10 26
Chris@15 27 Matcher::Matcher(Parameters parameters, Matcher *p) :
Chris@15 28 params(parameters)
cannam@0 29 {
Chris@10 30 #ifdef DEBUG_MATCHER
Chris@15 31 cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ")" << endl;
Chris@10 32 #endif
cannam@0 33
cannam@0 34 otherMatcher = p; // the first matcher will need this to be set later
cannam@0 35 firstPM = (!p);
cannam@0 36 ltAverage = 0;
cannam@0 37 frameCount = 0;
cannam@0 38 runCount = 0;
cannam@0 39 blockSize = 0;
cannam@0 40 scale = 90;
cannam@0 41
Chris@15 42 blockSize = lrint(params.blockTime / params.hopTime);
Chris@15 43 #ifdef DEBUG_MATCHER
Chris@15 44 cerr << "Matcher: blockSize = " << blockSize << endl;
Chris@15 45 #endif
cannam@0 46
cannam@0 47 distance = 0;
cannam@0 48 bestPathCost = 0;
cannam@0 49 distYSizes = 0;
cannam@0 50 distXSize = 0;
cannam@0 51
cannam@0 52 initialised = false;
cannam@0 53
cannam@0 54 } // default constructor
cannam@0 55
cannam@0 56 Matcher::~Matcher()
cannam@0 57 {
Chris@10 58 #ifdef DEBUG_MATCHER
Chris@15 59 cerr << "Matcher(" << this << ")::~Matcher()" << endl;
Chris@10 60 #endif
cannam@0 61
cannam@0 62 if (initialised) {
cannam@0 63
cannam@0 64 for (int i = 0; i < distXSize; ++i) {
cannam@0 65 if (distance[i]) {
cannam@0 66 free(distance[i]);
cannam@0 67 free(bestPathCost[i]);
cannam@0 68 }
cannam@0 69 }
cannam@0 70 free(distance);
cannam@0 71 free(bestPathCost);
cannam@0 72
cannam@0 73 free(first);
cannam@0 74 free(last);
cannam@0 75
cannam@0 76 free(distYSizes);
cannam@0 77 }
cannam@0 78 }
cannam@0 79
cannam@0 80 void
cannam@0 81 Matcher::init()
cannam@0 82 {
cannam@0 83 if (initialised) return;
cannam@0 84
cannam@0 85 initialised = true;
cannam@0 86
Chris@15 87 makeFreqMap();
cannam@0 88
cannam@0 89 initVector<double>(prevFrame, freqMapSize);
cannam@0 90 initVector<double>(newFrame, freqMapSize);
Chris@13 91 initMatrix<double>(frames, blockSize, freqMapSize);
Chris@13 92 initVector<double>(totalEnergies, blockSize);
cannam@0 93
Chris@15 94 int distSize = (params.maxRunCount + 1) * blockSize;
cannam@0 95
cannam@0 96 distXSize = blockSize * 2;
cannam@0 97
Chris@15 98 std::cerr << "Matcher::init: distXSize = " << distXSize << std::endl;
cannam@0 99
cannam@0 100 distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *));
cannam@0 101 bestPathCost = (int **)malloc(distXSize * sizeof(int *));
cannam@0 102 distYSizes = (int *)malloc(distXSize * sizeof(int));
cannam@0 103
cannam@0 104 for (int i = 0; i < blockSize; ++i) {
cannam@0 105 distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char));
cannam@0 106 bestPathCost[i] = (int *)malloc(distSize * sizeof(int));
cannam@0 107 distYSizes[i] = distSize;
cannam@0 108 }
cannam@0 109 for (int i = blockSize; i < distXSize; ++i) {
cannam@0 110 distance[i] = 0;
cannam@0 111 }
cannam@0 112
cannam@0 113 first = (int *)malloc(distXSize * sizeof(int));
cannam@0 114 last = (int *)malloc(distXSize * sizeof(int));
cannam@0 115
cannam@0 116 frameCount = 0;
cannam@0 117 runCount = 0;
cannam@0 118 ltAverage = 0;
cannam@0 119
cannam@0 120 } // init
cannam@0 121
cannam@0 122 void
Chris@15 123 Matcher::makeFreqMap()
cannam@0 124 {
Chris@15 125 initVector<int>(freqMap, params.fftSize/2 + 1);
Chris@15 126 if (params.useChromaFrequencyMap) {
Chris@15 127 #ifdef DEBUG_MATCHER
Chris@15 128 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
Chris@15 129 #endif
Chris@15 130 makeChromaFrequencyMap();
Chris@15 131 } else {
Chris@15 132 #ifdef DEBUG_MATCHER
Chris@15 133 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
Chris@15 134 #endif
Chris@15 135 makeStandardFrequencyMap();
Chris@15 136 }
cannam@0 137 } // makeFreqMap()
cannam@0 138
cannam@0 139 void
Chris@15 140 Matcher::makeStandardFrequencyMap()
cannam@0 141 {
Chris@15 142 double binWidth = params.sampleRate / params.fftSize;
cannam@0 143 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
cannam@7 144 int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/
cannam@7 145 log(2.0) * 12 + 69);
cannam@0 146 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
cannam@0 147 int i = 0;
cannam@0 148 while (i <= crossoverBin) {
cannam@0 149 freqMap[i] = i;
cannam@0 150 ++i;
cannam@0 151 }
Chris@15 152 while (i <= params.fftSize/2) {
cannam@7 153 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
cannam@0 154 if (midi > 127)
cannam@0 155 midi = 127;
cannam@0 156 freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi;
cannam@0 157 }
cannam@0 158 freqMapSize = freqMap[i-1] + 1;
cannam@0 159 if (!silent) {
cannam@0 160 cerr << "Standard map size: " << freqMapSize
cannam@0 161 << "; Crossover at: " << crossoverBin << endl;
Chris@15 162 for (i = 0; i < params.fftSize / 2; i++)
Chris@15 163 cerr << "freqMap[" << i << "] = " << freqMap[i] << endl;
cannam@0 164 }
cannam@0 165 } // makeStandardFrequencyMap()
cannam@0 166
cannam@0 167 void
Chris@15 168 Matcher::makeChromaFrequencyMap()
cannam@0 169 {
Chris@15 170 double binWidth = params.sampleRate / params.fftSize;
cannam@0 171 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
cannam@0 172 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
cannam@0 173 int i = 0;
cannam@0 174 while (i <= crossoverBin)
cannam@0 175 freqMap[i++] = 0;
Chris@15 176 while (i <= params.fftSize/2) {
cannam@7 177 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
cannam@0 178 freqMap[i++] = (lrint(midi)) % 12 + 1;
cannam@0 179 }
cannam@0 180 freqMapSize = 13;
cannam@0 181 if (!silent) {
cannam@0 182 cerr << "Chroma map size: " << freqMapSize
cannam@0 183 << "; Crossover at: " << crossoverBin << endl;
Chris@15 184 for (i = 0; i < params.fftSize / 2; i++)
cannam@0 185 cerr << "freqMap[" << i << "] = " << freqMap[i] << endl;
cannam@0 186 }
cannam@0 187 } // makeChromaFrequencyMap()
cannam@0 188
Chris@14 189 vector<double>
cannam@0 190 Matcher::processFrame(double *reBuffer, double *imBuffer)
cannam@0 191 {
cannam@0 192 if (!initialised) init();
cannam@0 193
cannam@0 194 for (int i = 0; i < (int)newFrame.size(); ++i) {
cannam@0 195 newFrame[i] = 0;
cannam@0 196 }
cannam@0 197 double rms = 0;
Chris@15 198 for (int i = 0; i <= params.fftSize/2; i++) {
cannam@0 199 double mag = reBuffer[i] * reBuffer[i] +
cannam@0 200 imBuffer[i] * imBuffer[i];
cannam@0 201 rms += mag;
cannam@0 202 newFrame[freqMap[i]] += mag;
cannam@0 203 }
Chris@15 204 rms = sqrt(rms / (params.fftSize/2));
cannam@0 205
cannam@0 206 int frameIndex = frameCount % blockSize;
cannam@0 207
cannam@0 208 if (frameCount >= distXSize) {
cannam@0 209 // std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl;
cannam@0 210 distXSize *= 2;
cannam@0 211 distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *));
cannam@0 212 bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *));
cannam@0 213 distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int));
cannam@0 214 first = (int *)realloc(first, distXSize * sizeof(int));
cannam@0 215 last = (int *)realloc(last, distXSize * sizeof(int));
cannam@0 216
cannam@0 217 for (int i = distXSize/2; i < distXSize; ++i) {
cannam@0 218 distance[i] = 0;
cannam@0 219 }
cannam@0 220 }
cannam@0 221
cannam@0 222 if (firstPM && (frameCount >= blockSize)) {
cannam@0 223
cannam@0 224 int len = last[frameCount - blockSize] -
cannam@0 225 first[frameCount - blockSize];
cannam@0 226
cannam@0 227 // We need to copy distance[frameCount-blockSize] to
cannam@0 228 // distance[frameCount], and then truncate
cannam@0 229 // distance[frameCount-blockSize] to its first len elements.
cannam@0 230 // Same for bestPathCost.
cannam@0 231 /*
cannam@4 232 std::cerr << "Matcher(" << this << "): moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to "
cannam@0 233 << frameCount << ", allocating " << len << " for "
cannam@0 234 << frameCount - blockSize << std::endl;
cannam@0 235 */
cannam@0 236 distance[frameCount] = distance[frameCount - blockSize];
cannam@0 237
cannam@0 238 distance[frameCount - blockSize] = (unsigned char *)
cannam@0 239 malloc(len * sizeof(unsigned char));
cannam@0 240 for (int i = 0; i < len; ++i) {
cannam@0 241 distance[frameCount - blockSize][i] =
cannam@0 242 distance[frameCount][i];
cannam@0 243 }
cannam@0 244
cannam@0 245 bestPathCost[frameCount] = bestPathCost[frameCount - blockSize];
cannam@0 246
cannam@0 247 bestPathCost[frameCount - blockSize] = (int *)
cannam@0 248 malloc(len * sizeof(int));
cannam@0 249 for (int i = 0; i < len; ++i) {
cannam@0 250 bestPathCost[frameCount - blockSize][i] =
cannam@0 251 bestPathCost[frameCount][i];
cannam@0 252 }
cannam@0 253
cannam@0 254 distYSizes[frameCount] = distYSizes[frameCount - blockSize];
cannam@0 255 distYSizes[frameCount - blockSize] = len;
cannam@0 256 }
cannam@0 257
cannam@0 258 double totalEnergy = 0;
Chris@15 259 if (params.useSpectralDifference) {
cannam@0 260 for (int i = 0; i < freqMapSize; i++) {
cannam@0 261 totalEnergy += newFrame[i];
cannam@0 262 if (newFrame[i] > prevFrame[i]) {
cannam@0 263 frames[frameIndex][i] = newFrame[i] - prevFrame[i];
cannam@0 264 } else {
cannam@0 265 frames[frameIndex][i] = 0;
cannam@0 266 }
cannam@0 267 }
cannam@0 268 } else {
cannam@0 269 for (int i = 0; i < freqMapSize; i++) {
cannam@0 270 frames[frameIndex][i] = newFrame[i];
cannam@0 271 totalEnergy += frames[frameIndex][i];
cannam@0 272 }
cannam@0 273 }
Chris@13 274 totalEnergies[frameIndex] = totalEnergy;
cannam@0 275
cannam@0 276 double decay = frameCount >= 200 ? 0.99:
cannam@0 277 (frameCount < 100? 0: (frameCount - 100) / 100.0);
cannam@0 278
cannam@0 279 if (ltAverage == 0)
cannam@0 280 ltAverage = totalEnergy;
cannam@0 281 else
cannam@0 282 ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay);
cannam@0 283
Chris@15 284 if (rms <= params.silenceThreshold)
cannam@0 285 for (int i = 0; i < freqMapSize; i++)
cannam@0 286 frames[frameIndex][i] = 0;
Chris@15 287 else if (params.frameNorm == NormaliseFrameToSum1)
cannam@0 288 for (int i = 0; i < freqMapSize; i++)
cannam@0 289 frames[frameIndex][i] /= totalEnergy;
Chris@15 290 else if (params.frameNorm == NormaliseFrameToLTAverage)
cannam@0 291 for (int i = 0; i < freqMapSize; i++)
cannam@0 292 frames[frameIndex][i] /= ltAverage;
cannam@0 293
Chris@14 294 vector<double> processedFrame = frames[frameIndex];
Chris@14 295
cannam@0 296 int stop = otherMatcher->frameCount;
cannam@0 297 int index = stop - blockSize;
cannam@0 298 if (index < 0)
cannam@0 299 index = 0;
cannam@0 300 first[frameCount] = index;
cannam@0 301 last[frameCount] = stop;
cannam@0 302
cannam@0 303 bool overflow = false;
cannam@0 304 int mn= -1;
cannam@0 305 int mx= -1;
cannam@0 306 for ( ; index < stop; index++) {
cannam@0 307 int dMN = calcDistance(frames[frameIndex],
cannam@0 308 otherMatcher->frames[index % blockSize]);
cannam@0 309 if (mx<0)
cannam@0 310 mx = mn = dMN;
cannam@0 311 else if (dMN > mx)
cannam@0 312 mx = dMN;
cannam@0 313 else if (dMN < mn)
cannam@0 314 mn = dMN;
cannam@0 315 if (dMN >= 255) {
cannam@0 316 overflow = true;
cannam@0 317 dMN = 255;
cannam@0 318 }
cannam@0 319 if ((frameCount == 0) && (index == 0)) // first element
cannam@0 320 setValue(0, 0, 0, 0, dMN);
cannam@0 321 else if (frameCount == 0) // first row
cannam@0 322 setValue(0, index, ADVANCE_OTHER,
cannam@0 323 getValue(0, index-1, true), dMN);
cannam@0 324 else if (index == 0) // first column
cannam@0 325 setValue(frameCount, index, ADVANCE_THIS,
cannam@0 326 getValue(frameCount - 1, 0, true), dMN);
cannam@0 327 else if (index == otherMatcher->frameCount - blockSize) {
cannam@0 328 // missing value(s) due to cutoff
cannam@0 329 // - no previous value in current row (resp. column)
cannam@0 330 // - no diagonal value if prev. dir. == curr. dirn
cannam@0 331 int min2 = getValue(frameCount - 1, index, true);
cannam@0 332 // if ((firstPM && (first[frameCount - 1] == index)) ||
cannam@0 333 // (!firstPM && (last[index-1] < frameCount)))
cannam@0 334 if (first[frameCount - 1] == index)
cannam@0 335 setValue(frameCount, index, ADVANCE_THIS, min2, dMN);
cannam@0 336 else {
cannam@0 337 int min1 = getValue(frameCount - 1, index - 1, true);
cannam@0 338 if (min1 + dMN <= min2)
cannam@0 339 setValue(frameCount, index, ADVANCE_BOTH, min1,dMN);
cannam@0 340 else
cannam@0 341 setValue(frameCount, index, ADVANCE_THIS, min2,dMN);
cannam@0 342 }
cannam@0 343 } else {
cannam@0 344 int min1 = getValue(frameCount, index-1, true);
cannam@0 345 int min2 = getValue(frameCount - 1, index, true);
cannam@0 346 int min3 = getValue(frameCount - 1, index-1, true);
cannam@0 347 if (min1 <= min2) {
cannam@0 348 if (min3 + dMN <= min1)
cannam@0 349 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN);
cannam@0 350 else
cannam@0 351 setValue(frameCount, index, ADVANCE_OTHER,min1,dMN);
cannam@0 352 } else {
cannam@0 353 if (min3 + dMN <= min2)
cannam@0 354 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN);
cannam@0 355 else
cannam@0 356 setValue(frameCount, index, ADVANCE_THIS, min2,dMN);
cannam@0 357 }
cannam@0 358 }
cannam@0 359 otherMatcher->last[index]++;
cannam@0 360 } // loop for row (resp. column)
cannam@0 361
cannam@0 362 vector<double> tmp = prevFrame;
cannam@0 363 prevFrame = newFrame;
cannam@0 364 newFrame = tmp;
cannam@0 365
cannam@0 366 frameCount++;
cannam@0 367 runCount++;
cannam@0 368
cannam@0 369 otherMatcher->runCount = 0;
cannam@0 370
cannam@0 371 if (overflow && !silent)
cannam@0 372 cerr << "WARNING: overflow in distance metric: "
cannam@0 373 << "frame " << frameCount << ", val = " << mx << endl;
cannam@0 374
cannam@0 375 if (!silent)
cannam@0 376 std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl;
cannam@0 377
cannam@0 378 if ((frameCount % 100) == 0) {
cannam@0 379 if (!silent) {
cannam@0 380 cerr << "Progress:" << frameCount << " " << ltAverage << endl;
cannam@0 381 }
cannam@0 382 }
Chris@14 383
Chris@14 384 return processedFrame;
cannam@0 385 } // processFrame()
cannam@0 386
cannam@0 387 int
cannam@0 388 Matcher::calcDistance(const vector<double> &f1, const vector<double> &f2)
cannam@0 389 {
cannam@0 390 double d = 0;
cannam@0 391 double sum = 0;
cannam@0 392 for (int i = 0; i < freqMapSize; i++) {
cannam@0 393 d += fabs(f1[i] - f2[i]);
cannam@0 394 sum += f1[i] + f2[i];
cannam@0 395 }
cannam@0 396 // System.err.print(" " + Format.d(d,3));
cannam@0 397 if (sum == 0)
cannam@0 398 return 0;
Chris@15 399 if (params.distanceNorm == NormaliseDistanceToSum)
cannam@0 400 return (int)(scale * d / sum); // 0 <= d/sum <= 2
Chris@15 401 if (params.distanceNorm != NormaliseDistanceToLogSum)
cannam@0 402 return (int)(scale * d);
Chris@13 403
Chris@13 404 // note if this were to be restored, it would have to use
Chris@13 405 // totalEnergies vector instead of f1[freqMapSize] which used to
Chris@13 406 // store the total energy:
cannam@0 407 // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0;
Chris@13 408
cannam@0 409 double weight = (8 + log(sum)) / 10.0;
cannam@0 410 // if (weight < mins) {
cannam@0 411 // mins = weight;
cannam@0 412 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs));
cannam@0 413 // }
cannam@0 414 // if (weight > maxs) {
cannam@0 415 // maxs = weight;
cannam@0 416 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs));
cannam@0 417 // }
cannam@0 418 if (weight < 0)
cannam@0 419 weight = 0;
cannam@0 420 else if (weight > 1)
cannam@0 421 weight = 1;
cannam@0 422 return (int)(scale * d / sum * weight);
cannam@0 423 } // calcDistance()
cannam@0 424
cannam@0 425 int
cannam@0 426 Matcher::getValue(int i, int j, bool firstAttempt)
cannam@0 427 {
cannam@0 428 if (firstPM)
cannam@0 429 return bestPathCost[i][j - first[i]];
cannam@0 430 else
cannam@0 431 return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]];
cannam@0 432 } // getValue()
cannam@0 433
cannam@0 434 void
cannam@0 435 Matcher::setValue(int i, int j, int dir, int value, int dMN)
cannam@0 436 {
cannam@0 437 if (firstPM) {
cannam@0 438 distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir);
cannam@0 439 bestPathCost[i][j - first[i]] =
cannam@0 440 (value + (dir==ADVANCE_BOTH? dMN*2: dMN));
cannam@0 441 } else {
cannam@0 442 if (dir == ADVANCE_THIS)
cannam@0 443 dir = ADVANCE_OTHER;
cannam@0 444 else if (dir == ADVANCE_OTHER)
cannam@0 445 dir = ADVANCE_THIS;
cannam@0 446 int idx = i - otherMatcher->first[j];
cannam@0 447 if (idx == (int)otherMatcher->distYSizes[j]) {
cannam@0 448 // This should never happen, but if we allow arbitrary
cannam@0 449 // pauses in either direction, and arbitrary lengths at
cannam@0 450 // end, it is better than a segmentation fault.
cannam@0 451 std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl;
cannam@0 452 otherMatcher->distYSizes[j] = idx * 2;
cannam@0 453 otherMatcher->bestPathCost[j] =
cannam@0 454 (int *)realloc(otherMatcher->bestPathCost[j],
cannam@0 455 idx * 2 * sizeof(int));
cannam@0 456 otherMatcher->distance[j] =
cannam@0 457 (unsigned char *)realloc(otherMatcher->distance[j],
cannam@0 458 idx * 2 * sizeof(unsigned char));
cannam@0 459 }
cannam@0 460 otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir);
cannam@0 461 otherMatcher->bestPathCost[j][idx] =
cannam@0 462 (value + (dir==ADVANCE_BOTH? dMN*2: dMN));
cannam@0 463 }
cannam@0 464 } // setValue()
cannam@0 465