annotate dsp/onsets/DetectionFunction.cpp @ 73:dcb555b90924

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author cannam
date Fri, 05 Jun 2009 15:12:39 +0000
parents 6cb2b3cd5356
children e5907ae6de17
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 QM DSP Library
cannam@0 5
cannam@0 6 Centre for Digital Music, Queen Mary, University of London.
cannam@0 7 This file copyright 2005-2006 Christian Landone.
cannam@0 8 All rights reserved.
cannam@0 9 */
cannam@0 10
cannam@0 11 #include "DetectionFunction.h"
cannam@47 12 #include <cstring>
cannam@0 13
cannam@0 14 //////////////////////////////////////////////////////////////////////
cannam@0 15 // Construction/Destruction
cannam@0 16 //////////////////////////////////////////////////////////////////////
cannam@0 17
cannam@0 18 DetectionFunction::DetectionFunction( DFConfig Config ) :
cannam@0 19 m_window(0)
cannam@0 20 {
cannam@2 21 m_magHistory = NULL;
cannam@2 22 m_phaseHistory = NULL;
cannam@2 23 m_phaseHistoryOld = NULL;
cannam@14 24 m_magPeaks = NULL;
cannam@0 25
cannam@0 26 initialise( Config );
cannam@0 27 }
cannam@0 28
cannam@0 29 DetectionFunction::~DetectionFunction()
cannam@0 30 {
cannam@0 31 deInitialise();
cannam@0 32 }
cannam@0 33
cannam@0 34
cannam@0 35 void DetectionFunction::initialise( DFConfig Config )
cannam@0 36 {
cannam@0 37 m_dataLength = Config.frameLength;
cannam@0 38 m_halfLength = m_dataLength/2;
cannam@14 39
cannam@0 40 m_DFType = Config.DFType;
cannam@13 41 m_stepSize = Config.stepSize;
cannam@0 42
cannam@14 43 m_whiten = Config.adaptiveWhitening;
cannam@14 44 m_whitenRelaxCoeff = Config.whiteningRelaxCoeff;
cannam@14 45 m_whitenFloor = Config.whiteningFloor;
cannam@14 46 if (m_whitenRelaxCoeff < 0) m_whitenRelaxCoeff = 0.9997;
cannam@14 47 if (m_whitenFloor < 0) m_whitenFloor = 0.01;
cannam@14 48
cannam@2 49 m_magHistory = new double[ m_halfLength ];
cannam@2 50 memset(m_magHistory,0, m_halfLength*sizeof(double));
cannam@0 51
cannam@2 52 m_phaseHistory = new double[ m_halfLength ];
cannam@2 53 memset(m_phaseHistory,0, m_halfLength*sizeof(double));
cannam@0 54
cannam@2 55 m_phaseHistoryOld = new double[ m_halfLength ];
cannam@2 56 memset(m_phaseHistoryOld,0, m_halfLength*sizeof(double));
cannam@0 57
cannam@14 58 m_magPeaks = new double[ m_halfLength ];
cannam@14 59 memset(m_magPeaks,0, m_halfLength*sizeof(double));
cannam@14 60
cannam@64 61 // See note in process(const double *) below
cannam@64 62 int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
cannam@64 63 m_phaseVoc = new PhaseVocoder(actualLength);
cannam@0 64
cannam@0 65 m_DFWindowedFrame = new double[ m_dataLength ];
cannam@0 66 m_magnitude = new double[ m_halfLength ];
cannam@0 67 m_thetaAngle = new double[ m_halfLength ];
cannam@0 68
cannam@0 69 m_window = new Window<double>(HanningWindow, m_dataLength);
cannam@0 70 }
cannam@0 71
cannam@0 72 void DetectionFunction::deInitialise()
cannam@0 73 {
cannam@2 74 delete [] m_magHistory ;
cannam@2 75 delete [] m_phaseHistory ;
cannam@2 76 delete [] m_phaseHistoryOld ;
cannam@14 77 delete [] m_magPeaks ;
cannam@0 78
cannam@0 79 delete m_phaseVoc;
cannam@0 80
cannam@0 81 delete [] m_DFWindowedFrame;
cannam@0 82 delete [] m_magnitude;
cannam@0 83 delete [] m_thetaAngle;
cannam@0 84
cannam@0 85 delete m_window;
cannam@0 86 }
cannam@0 87
cannam@55 88 double DetectionFunction::process( const double *TDomain )
cannam@0 89 {
cannam@0 90 m_window->cut( TDomain, m_DFWindowedFrame );
cannam@55 91
cannam@55 92 // Our own FFT implementation supports power-of-two sizes only.
cannam@55 93 // If we have to use this implementation (as opposed to the
cannam@55 94 // version of process() below that operates on frequency domain
cannam@55 95 // data directly), we will have to use the next smallest power of
cannam@55 96 // two from the block size. Results may vary accordingly!
cannam@55 97
cannam@55 98 int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
cannam@55 99
cannam@55 100 if (actualLength != m_dataLength) {
cannam@55 101 // Pre-fill mag and phase vectors with zero, as the FFT output
cannam@55 102 // will not fill the arrays
cannam@55 103 for (int i = actualLength/2; i < m_dataLength/2; ++i) {
cannam@55 104 m_magnitude[i] = 0;
cannam@55 105 m_thetaAngle[0] = 0;
cannam@55 106 }
cannam@55 107 }
cannam@55 108
cannam@64 109 m_phaseVoc->process(m_DFWindowedFrame, m_magnitude, m_thetaAngle);
cannam@0 110
cannam@14 111 if (m_whiten) whiten();
cannam@14 112
cannam@2 113 return runDF();
cannam@2 114 }
cannam@2 115
cannam@55 116 double DetectionFunction::process( const double *magnitudes, const double *phases )
cannam@2 117 {
cannam@2 118 for (size_t i = 0; i < m_halfLength; ++i) {
cannam@2 119 m_magnitude[i] = magnitudes[i];
cannam@2 120 m_thetaAngle[i] = phases[i];
cannam@2 121 }
cannam@2 122
cannam@14 123 if (m_whiten) whiten();
cannam@14 124
cannam@2 125 return runDF();
cannam@2 126 }
cannam@2 127
cannam@14 128 void DetectionFunction::whiten()
cannam@14 129 {
cannam@14 130 for (unsigned int i = 0; i < m_halfLength; ++i) {
cannam@14 131 double m = m_magnitude[i];
cannam@14 132 if (m < m_magPeaks[i]) {
cannam@14 133 m = m + (m_magPeaks[i] - m) * m_whitenRelaxCoeff;
cannam@14 134 }
cannam@14 135 if (m < m_whitenFloor) m = m_whitenFloor;
cannam@14 136 m_magPeaks[i] = m;
cannam@14 137 m_magnitude[i] /= m;
cannam@14 138 }
cannam@14 139 }
cannam@14 140
cannam@2 141 double DetectionFunction::runDF()
cannam@2 142 {
cannam@2 143 double retVal = 0;
cannam@2 144
cannam@0 145 switch( m_DFType )
cannam@0 146 {
cannam@0 147 case DF_HFC:
cannam@0 148 retVal = HFC( m_halfLength, m_magnitude);
cannam@0 149 break;
cannam@0 150
cannam@13 151 case DF_SPECDIFF:
cannam@0 152 retVal = specDiff( m_halfLength, m_magnitude);
cannam@0 153 break;
cannam@0 154
cannam@0 155 case DF_PHASEDEV:
cannam@14 156 retVal = phaseDev( m_halfLength, m_thetaAngle);
cannam@0 157 break;
cannam@0 158
cannam@0 159 case DF_COMPLEXSD:
cannam@0 160 retVal = complexSD( m_halfLength, m_magnitude, m_thetaAngle);
cannam@0 161 break;
cannam@12 162
cannam@12 163 case DF_BROADBAND:
cannam@14 164 retVal = broadband( m_halfLength, m_magnitude);
cannam@14 165 break;
cannam@0 166 }
cannam@0 167
cannam@0 168 return retVal;
cannam@0 169 }
cannam@0 170
cannam@0 171 double DetectionFunction::HFC(unsigned int length, double *src)
cannam@0 172 {
cannam@0 173 unsigned int i;
cannam@0 174 double val = 0;
cannam@0 175
cannam@0 176 for( i = 0; i < length; i++)
cannam@0 177 {
cannam@0 178 val += src[ i ] * ( i + 1);
cannam@0 179 }
cannam@0 180 return val;
cannam@0 181 }
cannam@0 182
cannam@0 183 double DetectionFunction::specDiff(unsigned int length, double *src)
cannam@0 184 {
cannam@0 185 unsigned int i;
cannam@0 186 double val = 0.0;
cannam@0 187 double temp = 0.0;
cannam@0 188 double diff = 0.0;
cannam@0 189
cannam@0 190 for( i = 0; i < length; i++)
cannam@0 191 {
cannam@2 192 temp = fabs( (src[ i ] * src[ i ]) - (m_magHistory[ i ] * m_magHistory[ i ]) );
cannam@0 193
cannam@0 194 diff= sqrt(temp);
cannam@0 195
cannam@13 196 // (See note in phaseDev below.)
cannam@13 197
cannam@13 198 val += diff;
cannam@0 199
cannam@2 200 m_magHistory[ i ] = src[ i ];
cannam@0 201 }
cannam@0 202
cannam@0 203 return val;
cannam@0 204 }
cannam@0 205
cannam@0 206
cannam@14 207 double DetectionFunction::phaseDev(unsigned int length, double *srcPhase)
cannam@0 208 {
cannam@0 209 unsigned int i;
cannam@0 210 double tmpPhase = 0;
cannam@0 211 double tmpVal = 0;
cannam@0 212 double val = 0;
cannam@0 213
cannam@0 214 double dev = 0;
cannam@0 215
cannam@0 216 for( i = 0; i < length; i++)
cannam@0 217 {
cannam@2 218 tmpPhase = (srcPhase[ i ]- 2*m_phaseHistory[ i ]+m_phaseHistoryOld[ i ]);
cannam@0 219 dev = MathUtilities::princarg( tmpPhase );
cannam@13 220
cannam@13 221 // A previous version of this code only counted the value here
cannam@13 222 // if the magnitude exceeded 0.1. My impression is that
cannam@13 223 // doesn't greatly improve the results for "loud" music (so
cannam@13 224 // long as the peak picker is reasonably sophisticated), but
cannam@13 225 // does significantly damage its ability to work with quieter
cannam@13 226 // music, so I'm removing it and counting the result always.
cannam@13 227 // Same goes for the spectral difference measure above.
cannam@0 228
cannam@13 229 tmpVal = fabs(dev);
cannam@13 230 val += tmpVal ;
cannam@0 231
cannam@2 232 m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
cannam@2 233 m_phaseHistory[ i ] = srcPhase[ i ];
cannam@0 234 }
cannam@0 235
cannam@0 236
cannam@0 237 return val;
cannam@0 238 }
cannam@0 239
cannam@0 240
cannam@0 241 double DetectionFunction::complexSD(unsigned int length, double *srcMagnitude, double *srcPhase)
cannam@0 242 {
cannam@0 243 unsigned int i;
cannam@0 244 double val = 0;
cannam@0 245 double tmpPhase = 0;
cannam@0 246 double tmpReal = 0;
cannam@0 247 double tmpImag = 0;
cannam@0 248
cannam@0 249 double dev = 0;
cannam@0 250 ComplexData meas = ComplexData( 0, 0 );
cannam@2 251 ComplexData j = ComplexData( 0, 1 );
cannam@0 252
cannam@0 253 for( i = 0; i < length; i++)
cannam@0 254 {
cannam@2 255 tmpPhase = (srcPhase[ i ]- 2*m_phaseHistory[ i ]+m_phaseHistoryOld[ i ]);
cannam@0 256 dev= MathUtilities::princarg( tmpPhase );
cannam@0 257
cannam@2 258 meas = m_magHistory[i] - ( srcMagnitude[ i ] * exp( j * dev) );
cannam@0 259
cannam@0 260 tmpReal = real( meas );
cannam@0 261 tmpImag = imag( meas );
cannam@0 262
cannam@0 263 val += sqrt( (tmpReal * tmpReal) + (tmpImag * tmpImag) );
cannam@0 264
cannam@2 265 m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
cannam@2 266 m_phaseHistory[ i ] = srcPhase[ i ];
cannam@2 267 m_magHistory[ i ] = srcMagnitude[ i ];
cannam@0 268 }
cannam@0 269
cannam@0 270 return val;
cannam@0 271 }
cannam@0 272
cannam@14 273 double DetectionFunction::broadband(unsigned int length, double *src)
cannam@12 274 {
cannam@12 275 double val = 0;
cannam@12 276 for (unsigned int i = 0; i < length; ++i) {
cannam@14 277 double sqrmag = src[i] * src[i];
cannam@12 278 if (m_magHistory[i] > 0.0) {
cannam@12 279 double diff = 10.0 * log10(sqrmag / m_magHistory[i]);
cannam@12 280 if (diff > m_dbRise) val = val + 1;
cannam@12 281 }
cannam@12 282 m_magHistory[i] = sqrmag;
cannam@12 283 }
cannam@12 284 return val;
cannam@12 285 }
cannam@12 286
cannam@0 287 double* DetectionFunction::getSpectrumMagnitude()
cannam@0 288 {
cannam@0 289 return m_magnitude;
cannam@0 290 }
cannam@0 291