annotate dsp/onsets/DetectionFunction.cpp @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents befe5aa6b450
children e5907ae6de17
rev   line source
c@225 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@225 2
c@225 3 /*
c@225 4 QM DSP Library
c@225 5
c@225 6 Centre for Digital Music, Queen Mary, University of London.
c@225 7 This file copyright 2005-2006 Christian Landone.
c@225 8 All rights reserved.
c@225 9 */
c@225 10
c@225 11 #include "DetectionFunction.h"
c@272 12 #include <cstring>
c@225 13
c@225 14 //////////////////////////////////////////////////////////////////////
c@225 15 // Construction/Destruction
c@225 16 //////////////////////////////////////////////////////////////////////
c@225 17
c@225 18 DetectionFunction::DetectionFunction( DFConfig Config ) :
c@225 19 m_window(0)
c@225 20 {
c@227 21 m_magHistory = NULL;
c@227 22 m_phaseHistory = NULL;
c@227 23 m_phaseHistoryOld = NULL;
c@239 24 m_magPeaks = NULL;
c@225 25
c@225 26 initialise( Config );
c@225 27 }
c@225 28
c@225 29 DetectionFunction::~DetectionFunction()
c@225 30 {
c@225 31 deInitialise();
c@225 32 }
c@225 33
c@225 34
c@225 35 void DetectionFunction::initialise( DFConfig Config )
c@225 36 {
c@225 37 m_dataLength = Config.frameLength;
c@225 38 m_halfLength = m_dataLength/2;
c@239 39
c@225 40 m_DFType = Config.DFType;
c@238 41 m_stepSize = Config.stepSize;
c@225 42
c@239 43 m_whiten = Config.adaptiveWhitening;
c@239 44 m_whitenRelaxCoeff = Config.whiteningRelaxCoeff;
c@239 45 m_whitenFloor = Config.whiteningFloor;
c@239 46 if (m_whitenRelaxCoeff < 0) m_whitenRelaxCoeff = 0.9997;
c@239 47 if (m_whitenFloor < 0) m_whitenFloor = 0.01;
c@239 48
c@227 49 m_magHistory = new double[ m_halfLength ];
c@227 50 memset(m_magHistory,0, m_halfLength*sizeof(double));
c@225 51
c@227 52 m_phaseHistory = new double[ m_halfLength ];
c@227 53 memset(m_phaseHistory,0, m_halfLength*sizeof(double));
c@225 54
c@227 55 m_phaseHistoryOld = new double[ m_halfLength ];
c@227 56 memset(m_phaseHistoryOld,0, m_halfLength*sizeof(double));
c@225 57
c@239 58 m_magPeaks = new double[ m_halfLength ];
c@239 59 memset(m_magPeaks,0, m_halfLength*sizeof(double));
c@239 60
c@289 61 // See note in process(const double *) below
c@289 62 int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
c@289 63 m_phaseVoc = new PhaseVocoder(actualLength);
c@225 64
c@225 65 m_DFWindowedFrame = new double[ m_dataLength ];
c@225 66 m_magnitude = new double[ m_halfLength ];
c@225 67 m_thetaAngle = new double[ m_halfLength ];
c@225 68
c@225 69 m_window = new Window<double>(HanningWindow, m_dataLength);
c@225 70 }
c@225 71
c@225 72 void DetectionFunction::deInitialise()
c@225 73 {
c@227 74 delete [] m_magHistory ;
c@227 75 delete [] m_phaseHistory ;
c@227 76 delete [] m_phaseHistoryOld ;
c@239 77 delete [] m_magPeaks ;
c@225 78
c@225 79 delete m_phaseVoc;
c@225 80
c@225 81 delete [] m_DFWindowedFrame;
c@225 82 delete [] m_magnitude;
c@225 83 delete [] m_thetaAngle;
c@225 84
c@225 85 delete m_window;
c@225 86 }
c@225 87
c@280 88 double DetectionFunction::process( const double *TDomain )
c@225 89 {
c@225 90 m_window->cut( TDomain, m_DFWindowedFrame );
c@280 91
c@280 92 // Our own FFT implementation supports power-of-two sizes only.
c@280 93 // If we have to use this implementation (as opposed to the
c@280 94 // version of process() below that operates on frequency domain
c@280 95 // data directly), we will have to use the next smallest power of
c@280 96 // two from the block size. Results may vary accordingly!
c@280 97
c@280 98 int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
c@280 99
c@280 100 if (actualLength != m_dataLength) {
c@280 101 // Pre-fill mag and phase vectors with zero, as the FFT output
c@280 102 // will not fill the arrays
c@280 103 for (int i = actualLength/2; i < m_dataLength/2; ++i) {
c@280 104 m_magnitude[i] = 0;
c@280 105 m_thetaAngle[0] = 0;
c@280 106 }
c@280 107 }
c@280 108
c@289 109 m_phaseVoc->process(m_DFWindowedFrame, m_magnitude, m_thetaAngle);
c@225 110
c@239 111 if (m_whiten) whiten();
c@239 112
c@227 113 return runDF();
c@227 114 }
c@227 115
c@280 116 double DetectionFunction::process( const double *magnitudes, const double *phases )
c@227 117 {
c@227 118 for (size_t i = 0; i < m_halfLength; ++i) {
c@227 119 m_magnitude[i] = magnitudes[i];
c@227 120 m_thetaAngle[i] = phases[i];
c@227 121 }
c@227 122
c@239 123 if (m_whiten) whiten();
c@239 124
c@227 125 return runDF();
c@227 126 }
c@227 127
c@239 128 void DetectionFunction::whiten()
c@239 129 {
c@239 130 for (unsigned int i = 0; i < m_halfLength; ++i) {
c@239 131 double m = m_magnitude[i];
c@239 132 if (m < m_magPeaks[i]) {
c@239 133 m = m + (m_magPeaks[i] - m) * m_whitenRelaxCoeff;
c@239 134 }
c@239 135 if (m < m_whitenFloor) m = m_whitenFloor;
c@239 136 m_magPeaks[i] = m;
c@239 137 m_magnitude[i] /= m;
c@239 138 }
c@239 139 }
c@239 140
c@227 141 double DetectionFunction::runDF()
c@227 142 {
c@227 143 double retVal = 0;
c@227 144
c@225 145 switch( m_DFType )
c@225 146 {
c@225 147 case DF_HFC:
c@225 148 retVal = HFC( m_halfLength, m_magnitude);
c@225 149 break;
c@225 150
c@238 151 case DF_SPECDIFF:
c@225 152 retVal = specDiff( m_halfLength, m_magnitude);
c@225 153 break;
c@225 154
c@225 155 case DF_PHASEDEV:
c@239 156 retVal = phaseDev( m_halfLength, m_thetaAngle);
c@225 157 break;
c@225 158
c@225 159 case DF_COMPLEXSD:
c@225 160 retVal = complexSD( m_halfLength, m_magnitude, m_thetaAngle);
c@225 161 break;
c@237 162
c@237 163 case DF_BROADBAND:
c@239 164 retVal = broadband( m_halfLength, m_magnitude);
c@239 165 break;
c@225 166 }
c@225 167
c@225 168 return retVal;
c@225 169 }
c@225 170
c@225 171 double DetectionFunction::HFC(unsigned int length, double *src)
c@225 172 {
c@225 173 unsigned int i;
c@225 174 double val = 0;
c@225 175
c@225 176 for( i = 0; i < length; i++)
c@225 177 {
c@225 178 val += src[ i ] * ( i + 1);
c@225 179 }
c@225 180 return val;
c@225 181 }
c@225 182
c@225 183 double DetectionFunction::specDiff(unsigned int length, double *src)
c@225 184 {
c@225 185 unsigned int i;
c@225 186 double val = 0.0;
c@225 187 double temp = 0.0;
c@225 188 double diff = 0.0;
c@225 189
c@225 190 for( i = 0; i < length; i++)
c@225 191 {
c@227 192 temp = fabs( (src[ i ] * src[ i ]) - (m_magHistory[ i ] * m_magHistory[ i ]) );
c@225 193
c@225 194 diff= sqrt(temp);
c@225 195
c@238 196 // (See note in phaseDev below.)
c@238 197
c@238 198 val += diff;
c@225 199
c@227 200 m_magHistory[ i ] = src[ i ];
c@225 201 }
c@225 202
c@225 203 return val;
c@225 204 }
c@225 205
c@225 206
c@239 207 double DetectionFunction::phaseDev(unsigned int length, double *srcPhase)
c@225 208 {
c@225 209 unsigned int i;
c@225 210 double tmpPhase = 0;
c@225 211 double tmpVal = 0;
c@225 212 double val = 0;
c@225 213
c@225 214 double dev = 0;
c@225 215
c@225 216 for( i = 0; i < length; i++)
c@225 217 {
c@227 218 tmpPhase = (srcPhase[ i ]- 2*m_phaseHistory[ i ]+m_phaseHistoryOld[ i ]);
c@225 219 dev = MathUtilities::princarg( tmpPhase );
c@238 220
c@238 221 // A previous version of this code only counted the value here
c@238 222 // if the magnitude exceeded 0.1. My impression is that
c@238 223 // doesn't greatly improve the results for "loud" music (so
c@238 224 // long as the peak picker is reasonably sophisticated), but
c@238 225 // does significantly damage its ability to work with quieter
c@238 226 // music, so I'm removing it and counting the result always.
c@238 227 // Same goes for the spectral difference measure above.
c@225 228
c@238 229 tmpVal = fabs(dev);
c@238 230 val += tmpVal ;
c@225 231
c@227 232 m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
c@227 233 m_phaseHistory[ i ] = srcPhase[ i ];
c@225 234 }
c@225 235
c@225 236
c@225 237 return val;
c@225 238 }
c@225 239
c@225 240
c@225 241 double DetectionFunction::complexSD(unsigned int length, double *srcMagnitude, double *srcPhase)
c@225 242 {
c@225 243 unsigned int i;
c@225 244 double val = 0;
c@225 245 double tmpPhase = 0;
c@225 246 double tmpReal = 0;
c@225 247 double tmpImag = 0;
c@225 248
c@225 249 double dev = 0;
c@225 250 ComplexData meas = ComplexData( 0, 0 );
c@227 251 ComplexData j = ComplexData( 0, 1 );
c@225 252
c@225 253 for( i = 0; i < length; i++)
c@225 254 {
c@227 255 tmpPhase = (srcPhase[ i ]- 2*m_phaseHistory[ i ]+m_phaseHistoryOld[ i ]);
c@225 256 dev= MathUtilities::princarg( tmpPhase );
c@225 257
c@227 258 meas = m_magHistory[i] - ( srcMagnitude[ i ] * exp( j * dev) );
c@225 259
c@225 260 tmpReal = real( meas );
c@225 261 tmpImag = imag( meas );
c@225 262
c@225 263 val += sqrt( (tmpReal * tmpReal) + (tmpImag * tmpImag) );
c@225 264
c@227 265 m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
c@227 266 m_phaseHistory[ i ] = srcPhase[ i ];
c@227 267 m_magHistory[ i ] = srcMagnitude[ i ];
c@225 268 }
c@225 269
c@225 270 return val;
c@225 271 }
c@225 272
c@239 273 double DetectionFunction::broadband(unsigned int length, double *src)
c@237 274 {
c@237 275 double val = 0;
c@237 276 for (unsigned int i = 0; i < length; ++i) {
c@239 277 double sqrmag = src[i] * src[i];
c@237 278 if (m_magHistory[i] > 0.0) {
c@237 279 double diff = 10.0 * log10(sqrmag / m_magHistory[i]);
c@237 280 if (diff > m_dbRise) val = val + 1;
c@237 281 }
c@237 282 m_magHistory[i] = sqrmag;
c@237 283 }
c@237 284 return val;
c@237 285 }
c@237 286
c@225 287 double* DetectionFunction::getSpectrumMagnitude()
c@225 288 {
c@225 289 return m_magnitude;
c@225 290 }
c@225 291