annotate dsp/tonal/ChangeDetectionFunction.cpp @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents cded679e12c2
children e5907ae6de17
rev   line source
c@225 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@225 2
c@225 3 /*
c@225 4 QM DSP Library
c@225 5
c@225 6 Centre for Digital Music, Queen Mary, University of London.
c@225 7 This file copyright 2006 Martin Gasser.
c@225 8 All rights reserved.
c@225 9 */
c@225 10
c@225 11 #include "ChangeDetectionFunction.h"
c@225 12
c@225 13 #ifndef PI
c@225 14 #define PI (3.14159265358979232846)
c@225 15 #endif
c@225 16
c@225 17
c@225 18
c@225 19 ChangeDetectionFunction::ChangeDetectionFunction(ChangeDFConfig config) :
c@225 20 m_dFilterSigma(0.0), m_iFilterWidth(0)
c@225 21 {
c@225 22 setFilterWidth(config.smoothingWidth);
c@225 23 }
c@225 24
c@225 25 ChangeDetectionFunction::~ChangeDetectionFunction()
c@225 26 {
c@225 27 }
c@225 28
c@225 29 void ChangeDetectionFunction::setFilterWidth(const int iWidth)
c@225 30 {
c@225 31 m_iFilterWidth = iWidth*2+1;
c@225 32
c@225 33 // it is assumed that the gaussian is 0 outside of +/- FWHM
c@225 34 // => filter width = 2*FWHM = 2*2.3548*sigma
c@225 35 m_dFilterSigma = double(m_iFilterWidth) / double(2*2.3548);
c@225 36 m_vaGaussian.resize(m_iFilterWidth);
c@225 37
c@225 38 double dScale = 1.0 / (m_dFilterSigma*sqrt(2*PI));
c@225 39
c@225 40 for (int x = -(m_iFilterWidth-1)/2; x <= (m_iFilterWidth-1)/2; x++)
c@225 41 {
c@225 42 double w = dScale * std::exp ( -(x*x)/(2*m_dFilterSigma*m_dFilterSigma) );
c@225 43 m_vaGaussian[x + (m_iFilterWidth-1)/2] = w;
c@225 44 }
c@225 45
c@225 46 #ifdef DEBUG_CHANGE_DETECTION_FUNCTION
c@275 47 std::cerr << "Filter sigma: " << m_dFilterSigma << std::endl;
c@275 48 std::cerr << "Filter width: " << m_iFilterWidth << std::endl;
c@225 49 #endif
c@225 50 }
c@225 51
c@225 52
c@225 53 ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
c@225 54 {
c@225 55 ChangeDistance retVal;
c@225 56 retVal.resize(rTCSGram.getSize(), 0.0);
c@225 57
c@225 58 TCSGram smoothedTCSGram;
c@225 59
c@225 60 for (int iPosition = 0; iPosition < rTCSGram.getSize(); iPosition++)
c@225 61 {
c@225 62 int iSkipLower = 0;
c@225 63
c@225 64 int iLowerPos = iPosition - (m_iFilterWidth-1)/2;
c@225 65 int iUpperPos = iPosition + (m_iFilterWidth-1)/2;
c@225 66
c@225 67 if (iLowerPos < 0)
c@225 68 {
c@225 69 iSkipLower = -iLowerPos;
c@225 70 iLowerPos = 0;
c@225 71 }
c@225 72
c@225 73 if (iUpperPos >= rTCSGram.getSize())
c@225 74 {
c@225 75 int iMaxIndex = rTCSGram.getSize() - 1;
c@225 76 iUpperPos = iMaxIndex;
c@225 77 }
c@225 78
c@225 79 TCSVector smoothedVector;
c@225 80
c@225 81 // for every bin of the vector, calculate the smoothed value
c@225 82 for (int iPC = 0; iPC < 6; iPC++)
c@225 83 {
c@225 84 size_t j = 0;
c@225 85 double dSmoothedValue = 0.0;
c@225 86 TCSVector rCV;
c@225 87
c@225 88 for (int i = iLowerPos; i <= iUpperPos; i++)
c@225 89 {
c@225 90 rTCSGram.getTCSVector(i, rCV);
c@225 91 dSmoothedValue += m_vaGaussian[iSkipLower + j++] * rCV[iPC];
c@225 92 }
c@225 93
c@225 94 smoothedVector[iPC] = dSmoothedValue;
c@225 95 }
c@225 96
c@225 97 smoothedTCSGram.addTCSVector(smoothedVector);
c@225 98 }
c@225 99
c@225 100 for (int iPosition = 0; iPosition < rTCSGram.getSize(); iPosition++)
c@225 101 {
c@225 102 /*
c@225 103 TODO: calculate a confidence measure for the current estimation
c@225 104 if the current estimate is not confident enough, look further into the future/the past
c@225 105 e.g., High frequency content, zero crossing rate, spectral flatness
c@225 106 */
c@225 107
c@225 108 TCSVector nextTCS;
c@225 109 TCSVector previousTCS;
c@225 110
c@225 111 int iWindow = 1;
c@225 112
c@225 113 // while (previousTCS.magnitude() < 0.1 && (iPosition-iWindow) > 0)
c@225 114 {
c@225 115 smoothedTCSGram.getTCSVector(iPosition-iWindow, previousTCS);
c@225 116 // std::cout << previousTCS.magnitude() << std::endl;
c@225 117 iWindow++;
c@225 118 }
c@225 119
c@225 120 iWindow = 1;
c@225 121
c@225 122 // while (nextTCS.magnitude() < 0.1 && (iPosition+iWindow) < (rTCSGram.getSize()-1) )
c@225 123 {
c@225 124 smoothedTCSGram.getTCSVector(iPosition+iWindow, nextTCS);
c@225 125 iWindow++;
c@225 126 }
c@225 127
c@225 128 double distance = 0.0;
c@225 129 // Euclidean distance
c@225 130 for (size_t j = 0; j < 6; j++)
c@225 131 {
c@225 132 distance += std::pow(nextTCS[j] - previousTCS[j], 2.0);
c@225 133 }
c@225 134
c@225 135 retVal[iPosition] = std::pow(distance, 0.5);
c@225 136 }
c@225 137
c@225 138 return retVal;
c@225 139 }