annotate Matcher.cpp @ 16:4c8526c5bf58

Implement features outputs
author Chris Cannam
date Fri, 10 Oct 2014 13:16:54 +0100
parents a82276091bbd
children 47f98349aa17
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
cannam@0 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
cannam@0 9
cannam@0 10 This program is free software; you can redistribute it and/or
cannam@0 11 modify it under the terms of the GNU General Public License as
cannam@0 12 published by the Free Software Foundation; either version 2 of the
cannam@0 13 License, or (at your option) any later version. See the file
cannam@0 14 COPYING included with this distribution for more information.
cannam@0 15 */
cannam@0 16
cannam@0 17 #include "Matcher.h"
cannam@0 18
cannam@0 19 #include <iostream>
cannam@0 20
cannam@4 21 #include <cstdlib>
Chris@16 22 #include <cassert>
cannam@4 23
cannam@0 24 bool Matcher::silent = true;
cannam@0 25
Chris@10 26 //#define DEBUG_MATCHER 1
Chris@10 27
Chris@15 28 Matcher::Matcher(Parameters parameters, Matcher *p) :
Chris@15 29 params(parameters)
cannam@0 30 {
Chris@10 31 #ifdef DEBUG_MATCHER
Chris@15 32 cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ")" << endl;
Chris@10 33 #endif
cannam@0 34
cannam@0 35 otherMatcher = p; // the first matcher will need this to be set later
cannam@0 36 firstPM = (!p);
cannam@0 37 ltAverage = 0;
cannam@0 38 frameCount = 0;
cannam@0 39 runCount = 0;
cannam@0 40 blockSize = 0;
cannam@0 41 scale = 90;
cannam@0 42
Chris@15 43 blockSize = lrint(params.blockTime / params.hopTime);
Chris@15 44 #ifdef DEBUG_MATCHER
Chris@15 45 cerr << "Matcher: blockSize = " << blockSize << endl;
Chris@15 46 #endif
cannam@0 47
cannam@0 48 distance = 0;
cannam@0 49 bestPathCost = 0;
cannam@0 50 distYSizes = 0;
cannam@0 51 distXSize = 0;
cannam@0 52
cannam@0 53 initialised = false;
cannam@0 54
cannam@0 55 } // default constructor
cannam@0 56
cannam@0 57 Matcher::~Matcher()
cannam@0 58 {
Chris@10 59 #ifdef DEBUG_MATCHER
Chris@15 60 cerr << "Matcher(" << this << ")::~Matcher()" << endl;
Chris@10 61 #endif
cannam@0 62
cannam@0 63 if (initialised) {
cannam@0 64
cannam@0 65 for (int i = 0; i < distXSize; ++i) {
cannam@0 66 if (distance[i]) {
cannam@0 67 free(distance[i]);
cannam@0 68 free(bestPathCost[i]);
cannam@0 69 }
cannam@0 70 }
cannam@0 71 free(distance);
cannam@0 72 free(bestPathCost);
cannam@0 73
cannam@0 74 free(first);
cannam@0 75 free(last);
cannam@0 76
cannam@0 77 free(distYSizes);
cannam@0 78 }
cannam@0 79 }
cannam@0 80
cannam@0 81 void
cannam@0 82 Matcher::init()
cannam@0 83 {
cannam@0 84 if (initialised) return;
cannam@0 85
cannam@0 86 initialised = true;
cannam@0 87
Chris@16 88 freqMapSize = getFeatureSize(params);
Chris@16 89
Chris@15 90 makeFreqMap();
cannam@0 91
cannam@0 92 initVector<double>(prevFrame, freqMapSize);
cannam@0 93 initVector<double>(newFrame, freqMapSize);
Chris@13 94 initMatrix<double>(frames, blockSize, freqMapSize);
Chris@13 95 initVector<double>(totalEnergies, blockSize);
cannam@0 96
Chris@15 97 int distSize = (params.maxRunCount + 1) * blockSize;
cannam@0 98
cannam@0 99 distXSize = blockSize * 2;
cannam@0 100
Chris@15 101 std::cerr << "Matcher::init: distXSize = " << distXSize << std::endl;
cannam@0 102
cannam@0 103 distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *));
cannam@0 104 bestPathCost = (int **)malloc(distXSize * sizeof(int *));
cannam@0 105 distYSizes = (int *)malloc(distXSize * sizeof(int));
cannam@0 106
cannam@0 107 for (int i = 0; i < blockSize; ++i) {
cannam@0 108 distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char));
cannam@0 109 bestPathCost[i] = (int *)malloc(distSize * sizeof(int));
cannam@0 110 distYSizes[i] = distSize;
cannam@0 111 }
cannam@0 112 for (int i = blockSize; i < distXSize; ++i) {
cannam@0 113 distance[i] = 0;
cannam@0 114 }
cannam@0 115
cannam@0 116 first = (int *)malloc(distXSize * sizeof(int));
cannam@0 117 last = (int *)malloc(distXSize * sizeof(int));
cannam@0 118
cannam@0 119 frameCount = 0;
cannam@0 120 runCount = 0;
cannam@0 121 ltAverage = 0;
cannam@0 122
cannam@0 123 } // init
cannam@0 124
cannam@0 125 void
Chris@15 126 Matcher::makeFreqMap()
cannam@0 127 {
Chris@15 128 initVector<int>(freqMap, params.fftSize/2 + 1);
Chris@15 129 if (params.useChromaFrequencyMap) {
Chris@15 130 #ifdef DEBUG_MATCHER
Chris@15 131 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
Chris@15 132 #endif
Chris@15 133 makeChromaFrequencyMap();
Chris@15 134 } else {
Chris@15 135 #ifdef DEBUG_MATCHER
Chris@15 136 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
Chris@15 137 #endif
Chris@15 138 makeStandardFrequencyMap();
Chris@15 139 }
cannam@0 140 } // makeFreqMap()
cannam@0 141
Chris@16 142 int
Chris@16 143 Matcher::getFeatureSize(Parameters params)
Chris@16 144 {
Chris@16 145 if (params.useChromaFrequencyMap) {
Chris@16 146 return 13;
Chris@16 147 } else {
Chris@16 148 return 84;
Chris@16 149 }
Chris@16 150 }
Chris@16 151
cannam@0 152 void
Chris@15 153 Matcher::makeStandardFrequencyMap()
cannam@0 154 {
Chris@15 155 double binWidth = params.sampleRate / params.fftSize;
cannam@0 156 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
cannam@7 157 int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/
cannam@7 158 log(2.0) * 12 + 69);
cannam@0 159 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
cannam@0 160 int i = 0;
cannam@0 161 while (i <= crossoverBin) {
cannam@0 162 freqMap[i] = i;
cannam@0 163 ++i;
cannam@0 164 }
Chris@15 165 while (i <= params.fftSize/2) {
cannam@7 166 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
Chris@16 167 if (midi > 127) midi = 127;
cannam@0 168 freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi;
cannam@0 169 }
Chris@16 170 assert(freqMapSize == freqMap[i-1] + 1);
cannam@0 171 if (!silent) {
cannam@0 172 cerr << "Standard map size: " << freqMapSize
cannam@0 173 << "; Crossover at: " << crossoverBin << endl;
Chris@15 174 for (i = 0; i < params.fftSize / 2; i++)
Chris@15 175 cerr << "freqMap[" << i << "] = " << freqMap[i] << endl;
cannam@0 176 }
cannam@0 177 } // makeStandardFrequencyMap()
cannam@0 178
cannam@0 179 void
Chris@15 180 Matcher::makeChromaFrequencyMap()
cannam@0 181 {
Chris@15 182 double binWidth = params.sampleRate / params.fftSize;
cannam@0 183 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
cannam@0 184 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
cannam@0 185 int i = 0;
cannam@0 186 while (i <= crossoverBin)
cannam@0 187 freqMap[i++] = 0;
Chris@15 188 while (i <= params.fftSize/2) {
cannam@7 189 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
cannam@0 190 freqMap[i++] = (lrint(midi)) % 12 + 1;
cannam@0 191 }
cannam@0 192 if (!silent) {
cannam@0 193 cerr << "Chroma map size: " << freqMapSize
cannam@0 194 << "; Crossover at: " << crossoverBin << endl;
Chris@15 195 for (i = 0; i < params.fftSize / 2; i++)
cannam@0 196 cerr << "freqMap[" << i << "] = " << freqMap[i] << endl;
cannam@0 197 }
cannam@0 198 } // makeChromaFrequencyMap()
cannam@0 199
Chris@14 200 vector<double>
cannam@0 201 Matcher::processFrame(double *reBuffer, double *imBuffer)
cannam@0 202 {
cannam@0 203 if (!initialised) init();
cannam@0 204
cannam@0 205 for (int i = 0; i < (int)newFrame.size(); ++i) {
cannam@0 206 newFrame[i] = 0;
cannam@0 207 }
cannam@0 208 double rms = 0;
Chris@15 209 for (int i = 0; i <= params.fftSize/2; i++) {
cannam@0 210 double mag = reBuffer[i] * reBuffer[i] +
cannam@0 211 imBuffer[i] * imBuffer[i];
cannam@0 212 rms += mag;
cannam@0 213 newFrame[freqMap[i]] += mag;
cannam@0 214 }
Chris@15 215 rms = sqrt(rms / (params.fftSize/2));
cannam@0 216
cannam@0 217 int frameIndex = frameCount % blockSize;
cannam@0 218
cannam@0 219 if (frameCount >= distXSize) {
cannam@0 220 // std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl;
cannam@0 221 distXSize *= 2;
cannam@0 222 distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *));
cannam@0 223 bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *));
cannam@0 224 distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int));
cannam@0 225 first = (int *)realloc(first, distXSize * sizeof(int));
cannam@0 226 last = (int *)realloc(last, distXSize * sizeof(int));
cannam@0 227
cannam@0 228 for (int i = distXSize/2; i < distXSize; ++i) {
cannam@0 229 distance[i] = 0;
cannam@0 230 }
cannam@0 231 }
cannam@0 232
cannam@0 233 if (firstPM && (frameCount >= blockSize)) {
cannam@0 234
cannam@0 235 int len = last[frameCount - blockSize] -
cannam@0 236 first[frameCount - blockSize];
cannam@0 237
cannam@0 238 // We need to copy distance[frameCount-blockSize] to
cannam@0 239 // distance[frameCount], and then truncate
cannam@0 240 // distance[frameCount-blockSize] to its first len elements.
cannam@0 241 // Same for bestPathCost.
cannam@0 242 /*
cannam@4 243 std::cerr << "Matcher(" << this << "): moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to "
cannam@0 244 << frameCount << ", allocating " << len << " for "
cannam@0 245 << frameCount - blockSize << std::endl;
cannam@0 246 */
cannam@0 247 distance[frameCount] = distance[frameCount - blockSize];
cannam@0 248
cannam@0 249 distance[frameCount - blockSize] = (unsigned char *)
cannam@0 250 malloc(len * sizeof(unsigned char));
cannam@0 251 for (int i = 0; i < len; ++i) {
cannam@0 252 distance[frameCount - blockSize][i] =
cannam@0 253 distance[frameCount][i];
cannam@0 254 }
cannam@0 255
cannam@0 256 bestPathCost[frameCount] = bestPathCost[frameCount - blockSize];
cannam@0 257
cannam@0 258 bestPathCost[frameCount - blockSize] = (int *)
cannam@0 259 malloc(len * sizeof(int));
cannam@0 260 for (int i = 0; i < len; ++i) {
cannam@0 261 bestPathCost[frameCount - blockSize][i] =
cannam@0 262 bestPathCost[frameCount][i];
cannam@0 263 }
cannam@0 264
cannam@0 265 distYSizes[frameCount] = distYSizes[frameCount - blockSize];
cannam@0 266 distYSizes[frameCount - blockSize] = len;
cannam@0 267 }
cannam@0 268
cannam@0 269 double totalEnergy = 0;
Chris@15 270 if (params.useSpectralDifference) {
cannam@0 271 for (int i = 0; i < freqMapSize; i++) {
cannam@0 272 totalEnergy += newFrame[i];
cannam@0 273 if (newFrame[i] > prevFrame[i]) {
cannam@0 274 frames[frameIndex][i] = newFrame[i] - prevFrame[i];
cannam@0 275 } else {
cannam@0 276 frames[frameIndex][i] = 0;
cannam@0 277 }
cannam@0 278 }
cannam@0 279 } else {
cannam@0 280 for (int i = 0; i < freqMapSize; i++) {
cannam@0 281 frames[frameIndex][i] = newFrame[i];
cannam@0 282 totalEnergy += frames[frameIndex][i];
cannam@0 283 }
cannam@0 284 }
Chris@13 285 totalEnergies[frameIndex] = totalEnergy;
cannam@0 286
cannam@0 287 double decay = frameCount >= 200 ? 0.99:
cannam@0 288 (frameCount < 100? 0: (frameCount - 100) / 100.0);
cannam@0 289
cannam@0 290 if (ltAverage == 0)
cannam@0 291 ltAverage = totalEnergy;
cannam@0 292 else
cannam@0 293 ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay);
cannam@0 294
Chris@15 295 if (rms <= params.silenceThreshold)
cannam@0 296 for (int i = 0; i < freqMapSize; i++)
cannam@0 297 frames[frameIndex][i] = 0;
Chris@15 298 else if (params.frameNorm == NormaliseFrameToSum1)
cannam@0 299 for (int i = 0; i < freqMapSize; i++)
cannam@0 300 frames[frameIndex][i] /= totalEnergy;
Chris@15 301 else if (params.frameNorm == NormaliseFrameToLTAverage)
cannam@0 302 for (int i = 0; i < freqMapSize; i++)
cannam@0 303 frames[frameIndex][i] /= ltAverage;
cannam@0 304
Chris@14 305 vector<double> processedFrame = frames[frameIndex];
Chris@14 306
cannam@0 307 int stop = otherMatcher->frameCount;
cannam@0 308 int index = stop - blockSize;
cannam@0 309 if (index < 0)
cannam@0 310 index = 0;
cannam@0 311 first[frameCount] = index;
cannam@0 312 last[frameCount] = stop;
cannam@0 313
cannam@0 314 bool overflow = false;
cannam@0 315 int mn= -1;
cannam@0 316 int mx= -1;
cannam@0 317 for ( ; index < stop; index++) {
cannam@0 318 int dMN = calcDistance(frames[frameIndex],
cannam@0 319 otherMatcher->frames[index % blockSize]);
cannam@0 320 if (mx<0)
cannam@0 321 mx = mn = dMN;
cannam@0 322 else if (dMN > mx)
cannam@0 323 mx = dMN;
cannam@0 324 else if (dMN < mn)
cannam@0 325 mn = dMN;
cannam@0 326 if (dMN >= 255) {
cannam@0 327 overflow = true;
cannam@0 328 dMN = 255;
cannam@0 329 }
cannam@0 330 if ((frameCount == 0) && (index == 0)) // first element
cannam@0 331 setValue(0, 0, 0, 0, dMN);
cannam@0 332 else if (frameCount == 0) // first row
cannam@0 333 setValue(0, index, ADVANCE_OTHER,
cannam@0 334 getValue(0, index-1, true), dMN);
cannam@0 335 else if (index == 0) // first column
cannam@0 336 setValue(frameCount, index, ADVANCE_THIS,
cannam@0 337 getValue(frameCount - 1, 0, true), dMN);
cannam@0 338 else if (index == otherMatcher->frameCount - blockSize) {
cannam@0 339 // missing value(s) due to cutoff
cannam@0 340 // - no previous value in current row (resp. column)
cannam@0 341 // - no diagonal value if prev. dir. == curr. dirn
cannam@0 342 int min2 = getValue(frameCount - 1, index, true);
cannam@0 343 // if ((firstPM && (first[frameCount - 1] == index)) ||
cannam@0 344 // (!firstPM && (last[index-1] < frameCount)))
cannam@0 345 if (first[frameCount - 1] == index)
cannam@0 346 setValue(frameCount, index, ADVANCE_THIS, min2, dMN);
cannam@0 347 else {
cannam@0 348 int min1 = getValue(frameCount - 1, index - 1, true);
cannam@0 349 if (min1 + dMN <= min2)
cannam@0 350 setValue(frameCount, index, ADVANCE_BOTH, min1,dMN);
cannam@0 351 else
cannam@0 352 setValue(frameCount, index, ADVANCE_THIS, min2,dMN);
cannam@0 353 }
cannam@0 354 } else {
cannam@0 355 int min1 = getValue(frameCount, index-1, true);
cannam@0 356 int min2 = getValue(frameCount - 1, index, true);
cannam@0 357 int min3 = getValue(frameCount - 1, index-1, true);
cannam@0 358 if (min1 <= min2) {
cannam@0 359 if (min3 + dMN <= min1)
cannam@0 360 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN);
cannam@0 361 else
cannam@0 362 setValue(frameCount, index, ADVANCE_OTHER,min1,dMN);
cannam@0 363 } else {
cannam@0 364 if (min3 + dMN <= min2)
cannam@0 365 setValue(frameCount, index, ADVANCE_BOTH, min3,dMN);
cannam@0 366 else
cannam@0 367 setValue(frameCount, index, ADVANCE_THIS, min2,dMN);
cannam@0 368 }
cannam@0 369 }
cannam@0 370 otherMatcher->last[index]++;
cannam@0 371 } // loop for row (resp. column)
cannam@0 372
cannam@0 373 vector<double> tmp = prevFrame;
cannam@0 374 prevFrame = newFrame;
cannam@0 375 newFrame = tmp;
cannam@0 376
cannam@0 377 frameCount++;
cannam@0 378 runCount++;
cannam@0 379
cannam@0 380 otherMatcher->runCount = 0;
cannam@0 381
cannam@0 382 if (overflow && !silent)
cannam@0 383 cerr << "WARNING: overflow in distance metric: "
cannam@0 384 << "frame " << frameCount << ", val = " << mx << endl;
cannam@0 385
cannam@0 386 if (!silent)
cannam@0 387 std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl;
cannam@0 388
cannam@0 389 if ((frameCount % 100) == 0) {
cannam@0 390 if (!silent) {
cannam@0 391 cerr << "Progress:" << frameCount << " " << ltAverage << endl;
cannam@0 392 }
cannam@0 393 }
Chris@14 394
Chris@14 395 return processedFrame;
cannam@0 396 } // processFrame()
cannam@0 397
cannam@0 398 int
cannam@0 399 Matcher::calcDistance(const vector<double> &f1, const vector<double> &f2)
cannam@0 400 {
cannam@0 401 double d = 0;
cannam@0 402 double sum = 0;
cannam@0 403 for (int i = 0; i < freqMapSize; i++) {
cannam@0 404 d += fabs(f1[i] - f2[i]);
cannam@0 405 sum += f1[i] + f2[i];
cannam@0 406 }
cannam@0 407 // System.err.print(" " + Format.d(d,3));
cannam@0 408 if (sum == 0)
cannam@0 409 return 0;
Chris@15 410 if (params.distanceNorm == NormaliseDistanceToSum)
cannam@0 411 return (int)(scale * d / sum); // 0 <= d/sum <= 2
Chris@15 412 if (params.distanceNorm != NormaliseDistanceToLogSum)
cannam@0 413 return (int)(scale * d);
Chris@13 414
Chris@13 415 // note if this were to be restored, it would have to use
Chris@13 416 // totalEnergies vector instead of f1[freqMapSize] which used to
Chris@13 417 // store the total energy:
cannam@0 418 // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0;
Chris@13 419
cannam@0 420 double weight = (8 + log(sum)) / 10.0;
cannam@0 421 // if (weight < mins) {
cannam@0 422 // mins = weight;
cannam@0 423 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs));
cannam@0 424 // }
cannam@0 425 // if (weight > maxs) {
cannam@0 426 // maxs = weight;
cannam@0 427 // System.err.println(Format.d(mins,3) + " " + Format.d(maxs));
cannam@0 428 // }
cannam@0 429 if (weight < 0)
cannam@0 430 weight = 0;
cannam@0 431 else if (weight > 1)
cannam@0 432 weight = 1;
cannam@0 433 return (int)(scale * d / sum * weight);
cannam@0 434 } // calcDistance()
cannam@0 435
cannam@0 436 int
cannam@0 437 Matcher::getValue(int i, int j, bool firstAttempt)
cannam@0 438 {
cannam@0 439 if (firstPM)
cannam@0 440 return bestPathCost[i][j - first[i]];
cannam@0 441 else
cannam@0 442 return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]];
cannam@0 443 } // getValue()
cannam@0 444
cannam@0 445 void
cannam@0 446 Matcher::setValue(int i, int j, int dir, int value, int dMN)
cannam@0 447 {
cannam@0 448 if (firstPM) {
cannam@0 449 distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir);
cannam@0 450 bestPathCost[i][j - first[i]] =
cannam@0 451 (value + (dir==ADVANCE_BOTH? dMN*2: dMN));
cannam@0 452 } else {
cannam@0 453 if (dir == ADVANCE_THIS)
cannam@0 454 dir = ADVANCE_OTHER;
cannam@0 455 else if (dir == ADVANCE_OTHER)
cannam@0 456 dir = ADVANCE_THIS;
cannam@0 457 int idx = i - otherMatcher->first[j];
cannam@0 458 if (idx == (int)otherMatcher->distYSizes[j]) {
cannam@0 459 // This should never happen, but if we allow arbitrary
cannam@0 460 // pauses in either direction, and arbitrary lengths at
cannam@0 461 // end, it is better than a segmentation fault.
cannam@0 462 std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl;
cannam@0 463 otherMatcher->distYSizes[j] = idx * 2;
cannam@0 464 otherMatcher->bestPathCost[j] =
cannam@0 465 (int *)realloc(otherMatcher->bestPathCost[j],
cannam@0 466 idx * 2 * sizeof(int));
cannam@0 467 otherMatcher->distance[j] =
cannam@0 468 (unsigned char *)realloc(otherMatcher->distance[j],
cannam@0 469 idx * 2 * sizeof(unsigned char));
cannam@0 470 }
cannam@0 471 otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir);
cannam@0 472 otherMatcher->bestPathCost[j][idx] =
cannam@0 473 (value + (dir==ADVANCE_BOTH? dMN*2: dMN));
cannam@0 474 }
cannam@0 475 } // setValue()
cannam@0 476