cannam@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@0: cannam@0: /* cannam@0: Vamp feature extraction plugin using the MATCH audio alignment cannam@0: algorithm. cannam@0: cannam@0: Centre for Digital Music, Queen Mary, University of London. cannam@0: This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. cannam@0: cannam@0: This program is free software; you can redistribute it and/or cannam@0: modify it under the terms of the GNU General Public License as cannam@0: published by the Free Software Foundation; either version 2 of the cannam@0: License, or (at your option) any later version. See the file cannam@0: COPYING included with this distribution for more information. cannam@0: */ cannam@0: cannam@0: #include "Matcher.h" cannam@0: cannam@0: #include cannam@0: cannam@4: #include Chris@16: #include cannam@4: cannam@0: bool Matcher::silent = true; cannam@0: Chris@10: //#define DEBUG_MATCHER 1 Chris@10: Chris@15: Matcher::Matcher(Parameters parameters, Matcher *p) : Chris@15: params(parameters) cannam@0: { Chris@10: #ifdef DEBUG_MATCHER Chris@15: cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ")" << endl; Chris@10: #endif cannam@0: cannam@0: otherMatcher = p; // the first matcher will need this to be set later cannam@0: firstPM = (!p); cannam@0: ltAverage = 0; cannam@0: frameCount = 0; cannam@0: runCount = 0; cannam@0: blockSize = 0; cannam@0: scale = 90; cannam@0: Chris@15: blockSize = lrint(params.blockTime / params.hopTime); Chris@15: #ifdef DEBUG_MATCHER Chris@15: cerr << "Matcher: blockSize = " << blockSize << endl; Chris@15: #endif cannam@0: cannam@0: distance = 0; cannam@0: bestPathCost = 0; cannam@0: distYSizes = 0; cannam@0: distXSize = 0; cannam@0: cannam@0: initialised = false; cannam@0: cannam@0: } // default constructor cannam@0: cannam@0: Matcher::~Matcher() cannam@0: { Chris@10: #ifdef DEBUG_MATCHER Chris@15: cerr << "Matcher(" << this << ")::~Matcher()" << endl; Chris@10: #endif cannam@0: cannam@0: if (initialised) { cannam@0: cannam@0: for (int i = 0; i < distXSize; ++i) { cannam@0: if (distance[i]) { cannam@0: free(distance[i]); cannam@0: free(bestPathCost[i]); cannam@0: } cannam@0: } cannam@0: free(distance); cannam@0: free(bestPathCost); cannam@0: cannam@0: free(first); cannam@0: free(last); cannam@0: cannam@0: free(distYSizes); cannam@0: } cannam@0: } cannam@0: cannam@0: void cannam@0: Matcher::init() cannam@0: { cannam@0: if (initialised) return; cannam@0: cannam@0: initialised = true; cannam@0: Chris@16: freqMapSize = getFeatureSize(params); Chris@16: Chris@15: makeFreqMap(); cannam@0: cannam@0: initVector(prevFrame, freqMapSize); cannam@0: initVector(newFrame, freqMapSize); Chris@13: initMatrix(frames, blockSize, freqMapSize); Chris@13: initVector(totalEnergies, blockSize); cannam@0: Chris@15: int distSize = (params.maxRunCount + 1) * blockSize; cannam@0: cannam@0: distXSize = blockSize * 2; cannam@0: Chris@15: std::cerr << "Matcher::init: distXSize = " << distXSize << std::endl; cannam@0: cannam@0: distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *)); cannam@0: bestPathCost = (int **)malloc(distXSize * sizeof(int *)); cannam@0: distYSizes = (int *)malloc(distXSize * sizeof(int)); cannam@0: cannam@0: for (int i = 0; i < blockSize; ++i) { cannam@0: distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char)); cannam@0: bestPathCost[i] = (int *)malloc(distSize * sizeof(int)); cannam@0: distYSizes[i] = distSize; cannam@0: } cannam@0: for (int i = blockSize; i < distXSize; ++i) { cannam@0: distance[i] = 0; cannam@0: } cannam@0: cannam@0: first = (int *)malloc(distXSize * sizeof(int)); cannam@0: last = (int *)malloc(distXSize * sizeof(int)); cannam@0: cannam@0: frameCount = 0; cannam@0: runCount = 0; cannam@0: ltAverage = 0; cannam@0: cannam@0: } // init cannam@0: cannam@0: void Chris@15: Matcher::makeFreqMap() cannam@0: { Chris@15: initVector(freqMap, params.fftSize/2 + 1); Chris@15: if (params.useChromaFrequencyMap) { Chris@15: #ifdef DEBUG_MATCHER Chris@15: cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; Chris@15: #endif Chris@15: makeChromaFrequencyMap(); Chris@15: } else { Chris@15: #ifdef DEBUG_MATCHER Chris@15: cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; Chris@15: #endif Chris@15: makeStandardFrequencyMap(); Chris@15: } cannam@0: } // makeFreqMap() cannam@0: Chris@16: int Chris@16: Matcher::getFeatureSize(Parameters params) Chris@16: { Chris@16: if (params.useChromaFrequencyMap) { Chris@16: return 13; Chris@16: } else { Chris@16: return 84; Chris@16: } Chris@16: } Chris@16: cannam@0: void Chris@15: Matcher::makeStandardFrequencyMap() cannam@0: { Chris@15: double binWidth = params.sampleRate / params.fftSize; cannam@0: int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); cannam@7: int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ cannam@7: log(2.0) * 12 + 69); cannam@0: // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; cannam@0: int i = 0; cannam@0: while (i <= crossoverBin) { cannam@0: freqMap[i] = i; cannam@0: ++i; cannam@0: } Chris@15: while (i <= params.fftSize/2) { cannam@7: double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; Chris@16: if (midi > 127) midi = 127; cannam@0: freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi; cannam@0: } Chris@16: assert(freqMapSize == freqMap[i-1] + 1); cannam@0: if (!silent) { cannam@0: cerr << "Standard map size: " << freqMapSize cannam@0: << "; Crossover at: " << crossoverBin << endl; Chris@15: for (i = 0; i < params.fftSize / 2; i++) Chris@15: cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; cannam@0: } cannam@0: } // makeStandardFrequencyMap() cannam@0: cannam@0: void Chris@15: Matcher::makeChromaFrequencyMap() cannam@0: { Chris@15: double binWidth = params.sampleRate / params.fftSize; cannam@0: int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); cannam@0: // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; cannam@0: int i = 0; cannam@0: while (i <= crossoverBin) cannam@0: freqMap[i++] = 0; Chris@15: while (i <= params.fftSize/2) { cannam@7: double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; cannam@0: freqMap[i++] = (lrint(midi)) % 12 + 1; cannam@0: } cannam@0: if (!silent) { cannam@0: cerr << "Chroma map size: " << freqMapSize cannam@0: << "; Crossover at: " << crossoverBin << endl; Chris@15: for (i = 0; i < params.fftSize / 2; i++) cannam@0: cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; cannam@0: } cannam@0: } // makeChromaFrequencyMap() cannam@0: Chris@14: vector cannam@0: Matcher::processFrame(double *reBuffer, double *imBuffer) cannam@0: { cannam@0: if (!initialised) init(); cannam@0: cannam@0: for (int i = 0; i < (int)newFrame.size(); ++i) { cannam@0: newFrame[i] = 0; cannam@0: } cannam@0: double rms = 0; Chris@15: for (int i = 0; i <= params.fftSize/2; i++) { cannam@0: double mag = reBuffer[i] * reBuffer[i] + cannam@0: imBuffer[i] * imBuffer[i]; cannam@0: rms += mag; cannam@0: newFrame[freqMap[i]] += mag; cannam@0: } Chris@15: rms = sqrt(rms / (params.fftSize/2)); cannam@0: cannam@0: int frameIndex = frameCount % blockSize; cannam@0: cannam@0: if (frameCount >= distXSize) { cannam@0: // std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl; cannam@0: distXSize *= 2; cannam@0: distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *)); cannam@0: bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *)); cannam@0: distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int)); cannam@0: first = (int *)realloc(first, distXSize * sizeof(int)); cannam@0: last = (int *)realloc(last, distXSize * sizeof(int)); cannam@0: cannam@0: for (int i = distXSize/2; i < distXSize; ++i) { cannam@0: distance[i] = 0; cannam@0: } cannam@0: } cannam@0: cannam@0: if (firstPM && (frameCount >= blockSize)) { cannam@0: cannam@0: int len = last[frameCount - blockSize] - cannam@0: first[frameCount - blockSize]; cannam@0: cannam@0: // We need to copy distance[frameCount-blockSize] to cannam@0: // distance[frameCount], and then truncate cannam@0: // distance[frameCount-blockSize] to its first len elements. cannam@0: // Same for bestPathCost. cannam@0: /* cannam@4: std::cerr << "Matcher(" << this << "): moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to " cannam@0: << frameCount << ", allocating " << len << " for " cannam@0: << frameCount - blockSize << std::endl; cannam@0: */ cannam@0: distance[frameCount] = distance[frameCount - blockSize]; cannam@0: cannam@0: distance[frameCount - blockSize] = (unsigned char *) cannam@0: malloc(len * sizeof(unsigned char)); cannam@0: for (int i = 0; i < len; ++i) { cannam@0: distance[frameCount - blockSize][i] = cannam@0: distance[frameCount][i]; cannam@0: } cannam@0: cannam@0: bestPathCost[frameCount] = bestPathCost[frameCount - blockSize]; cannam@0: cannam@0: bestPathCost[frameCount - blockSize] = (int *) cannam@0: malloc(len * sizeof(int)); cannam@0: for (int i = 0; i < len; ++i) { cannam@0: bestPathCost[frameCount - blockSize][i] = cannam@0: bestPathCost[frameCount][i]; cannam@0: } cannam@0: cannam@0: distYSizes[frameCount] = distYSizes[frameCount - blockSize]; cannam@0: distYSizes[frameCount - blockSize] = len; cannam@0: } cannam@0: cannam@0: double totalEnergy = 0; Chris@15: if (params.useSpectralDifference) { cannam@0: for (int i = 0; i < freqMapSize; i++) { cannam@0: totalEnergy += newFrame[i]; cannam@0: if (newFrame[i] > prevFrame[i]) { cannam@0: frames[frameIndex][i] = newFrame[i] - prevFrame[i]; cannam@0: } else { cannam@0: frames[frameIndex][i] = 0; cannam@0: } cannam@0: } cannam@0: } else { cannam@0: for (int i = 0; i < freqMapSize; i++) { cannam@0: frames[frameIndex][i] = newFrame[i]; cannam@0: totalEnergy += frames[frameIndex][i]; cannam@0: } cannam@0: } Chris@13: totalEnergies[frameIndex] = totalEnergy; cannam@0: cannam@0: double decay = frameCount >= 200 ? 0.99: cannam@0: (frameCount < 100? 0: (frameCount - 100) / 100.0); cannam@0: cannam@0: if (ltAverage == 0) cannam@0: ltAverage = totalEnergy; cannam@0: else cannam@0: ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); cannam@0: Chris@15: if (rms <= params.silenceThreshold) cannam@0: for (int i = 0; i < freqMapSize; i++) cannam@0: frames[frameIndex][i] = 0; Chris@15: else if (params.frameNorm == NormaliseFrameToSum1) cannam@0: for (int i = 0; i < freqMapSize; i++) cannam@0: frames[frameIndex][i] /= totalEnergy; Chris@15: else if (params.frameNorm == NormaliseFrameToLTAverage) cannam@0: for (int i = 0; i < freqMapSize; i++) cannam@0: frames[frameIndex][i] /= ltAverage; cannam@0: Chris@14: vector processedFrame = frames[frameIndex]; Chris@14: cannam@0: int stop = otherMatcher->frameCount; cannam@0: int index = stop - blockSize; cannam@0: if (index < 0) cannam@0: index = 0; cannam@0: first[frameCount] = index; cannam@0: last[frameCount] = stop; cannam@0: cannam@0: bool overflow = false; cannam@0: int mn= -1; cannam@0: int mx= -1; cannam@0: for ( ; index < stop; index++) { cannam@0: int dMN = calcDistance(frames[frameIndex], cannam@0: otherMatcher->frames[index % blockSize]); cannam@0: if (mx<0) cannam@0: mx = mn = dMN; cannam@0: else if (dMN > mx) cannam@0: mx = dMN; cannam@0: else if (dMN < mn) cannam@0: mn = dMN; cannam@0: if (dMN >= 255) { cannam@0: overflow = true; cannam@0: dMN = 255; cannam@0: } cannam@0: if ((frameCount == 0) && (index == 0)) // first element cannam@0: setValue(0, 0, 0, 0, dMN); cannam@0: else if (frameCount == 0) // first row cannam@0: setValue(0, index, ADVANCE_OTHER, cannam@0: getValue(0, index-1, true), dMN); cannam@0: else if (index == 0) // first column cannam@0: setValue(frameCount, index, ADVANCE_THIS, cannam@0: getValue(frameCount - 1, 0, true), dMN); cannam@0: else if (index == otherMatcher->frameCount - blockSize) { cannam@0: // missing value(s) due to cutoff cannam@0: // - no previous value in current row (resp. column) cannam@0: // - no diagonal value if prev. dir. == curr. dirn cannam@0: int min2 = getValue(frameCount - 1, index, true); cannam@0: // if ((firstPM && (first[frameCount - 1] == index)) || cannam@0: // (!firstPM && (last[index-1] < frameCount))) cannam@0: if (first[frameCount - 1] == index) cannam@0: setValue(frameCount, index, ADVANCE_THIS, min2, dMN); cannam@0: else { cannam@0: int min1 = getValue(frameCount - 1, index - 1, true); cannam@0: if (min1 + dMN <= min2) cannam@0: setValue(frameCount, index, ADVANCE_BOTH, min1,dMN); cannam@0: else cannam@0: setValue(frameCount, index, ADVANCE_THIS, min2,dMN); cannam@0: } cannam@0: } else { cannam@0: int min1 = getValue(frameCount, index-1, true); cannam@0: int min2 = getValue(frameCount - 1, index, true); cannam@0: int min3 = getValue(frameCount - 1, index-1, true); cannam@0: if (min1 <= min2) { cannam@0: if (min3 + dMN <= min1) cannam@0: setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); cannam@0: else cannam@0: setValue(frameCount, index, ADVANCE_OTHER,min1,dMN); cannam@0: } else { cannam@0: if (min3 + dMN <= min2) cannam@0: setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); cannam@0: else cannam@0: setValue(frameCount, index, ADVANCE_THIS, min2,dMN); cannam@0: } cannam@0: } cannam@0: otherMatcher->last[index]++; cannam@0: } // loop for row (resp. column) cannam@0: cannam@0: vector tmp = prevFrame; cannam@0: prevFrame = newFrame; cannam@0: newFrame = tmp; cannam@0: cannam@0: frameCount++; cannam@0: runCount++; cannam@0: cannam@0: otherMatcher->runCount = 0; cannam@0: cannam@0: if (overflow && !silent) cannam@0: cerr << "WARNING: overflow in distance metric: " cannam@0: << "frame " << frameCount << ", val = " << mx << endl; cannam@0: cannam@0: if (!silent) cannam@0: std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl; cannam@0: cannam@0: if ((frameCount % 100) == 0) { cannam@0: if (!silent) { cannam@0: cerr << "Progress:" << frameCount << " " << ltAverage << endl; cannam@0: } cannam@0: } Chris@14: Chris@14: return processedFrame; cannam@0: } // processFrame() cannam@0: cannam@0: int cannam@0: Matcher::calcDistance(const vector &f1, const vector &f2) cannam@0: { cannam@0: double d = 0; cannam@0: double sum = 0; cannam@0: for (int i = 0; i < freqMapSize; i++) { cannam@0: d += fabs(f1[i] - f2[i]); cannam@0: sum += f1[i] + f2[i]; cannam@0: } cannam@0: // System.err.print(" " + Format.d(d,3)); cannam@0: if (sum == 0) cannam@0: return 0; Chris@15: if (params.distanceNorm == NormaliseDistanceToSum) cannam@0: return (int)(scale * d / sum); // 0 <= d/sum <= 2 Chris@15: if (params.distanceNorm != NormaliseDistanceToLogSum) cannam@0: return (int)(scale * d); Chris@13: Chris@13: // note if this were to be restored, it would have to use Chris@13: // totalEnergies vector instead of f1[freqMapSize] which used to Chris@13: // store the total energy: cannam@0: // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0; Chris@13: cannam@0: double weight = (8 + log(sum)) / 10.0; cannam@0: // if (weight < mins) { cannam@0: // mins = weight; cannam@0: // System.err.println(Format.d(mins,3) + " " + Format.d(maxs)); cannam@0: // } cannam@0: // if (weight > maxs) { cannam@0: // maxs = weight; cannam@0: // System.err.println(Format.d(mins,3) + " " + Format.d(maxs)); cannam@0: // } cannam@0: if (weight < 0) cannam@0: weight = 0; cannam@0: else if (weight > 1) cannam@0: weight = 1; cannam@0: return (int)(scale * d / sum * weight); cannam@0: } // calcDistance() cannam@0: cannam@0: int cannam@0: Matcher::getValue(int i, int j, bool firstAttempt) cannam@0: { cannam@0: if (firstPM) cannam@0: return bestPathCost[i][j - first[i]]; cannam@0: else cannam@0: return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]]; cannam@0: } // getValue() cannam@0: cannam@0: void cannam@0: Matcher::setValue(int i, int j, int dir, int value, int dMN) cannam@0: { cannam@0: if (firstPM) { cannam@0: distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir); cannam@0: bestPathCost[i][j - first[i]] = cannam@0: (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); cannam@0: } else { cannam@0: if (dir == ADVANCE_THIS) cannam@0: dir = ADVANCE_OTHER; cannam@0: else if (dir == ADVANCE_OTHER) cannam@0: dir = ADVANCE_THIS; cannam@0: int idx = i - otherMatcher->first[j]; cannam@0: if (idx == (int)otherMatcher->distYSizes[j]) { cannam@0: // This should never happen, but if we allow arbitrary cannam@0: // pauses in either direction, and arbitrary lengths at cannam@0: // end, it is better than a segmentation fault. cannam@0: std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl; cannam@0: otherMatcher->distYSizes[j] = idx * 2; cannam@0: otherMatcher->bestPathCost[j] = cannam@0: (int *)realloc(otherMatcher->bestPathCost[j], cannam@0: idx * 2 * sizeof(int)); cannam@0: otherMatcher->distance[j] = cannam@0: (unsigned char *)realloc(otherMatcher->distance[j], cannam@0: idx * 2 * sizeof(unsigned char)); cannam@0: } cannam@0: otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir); cannam@0: otherMatcher->bestPathCost[j][idx] = cannam@0: (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); cannam@0: } cannam@0: } // setValue() cannam@0: