annotate dsp/keydetection/GetKeyMode.cpp @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents 5e125f030287
children 715f779d0b4f
rev   line source
c@259 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@259 2
c@232 3 // GetKeyMode.cpp: implementation of the CGetKeyMode class.
c@232 4 //
c@232 5 //////////////////////////////////////////////////////////////////////
c@232 6
c@232 7 #include "GetKeyMode.h"
c@241 8 #include "maths/MathUtilities.h"
c@234 9 #include "base/Pitch.h"
c@234 10
c@234 11 #include <iostream>
c@232 12
c@272 13 #include <cstring>
c@272 14 #include <cstdlib>
c@272 15
c@232 16 // Chords profile
c@232 17 static double MajProfile[36] =
c@232 18 { 0.0384, 0.0629, 0.0258, 0.0121, 0.0146, 0.0106, 0.0364, 0.0610, 0.0267,
devnull@240 19 0.0126, 0.0121, 0.0086, 0.0364, 0.0623, 0.0279, 0.0275, 0.0414, 0.0186,
c@232 20 0.0173, 0.0248, 0.0145, 0.0364, 0.0631, 0.0262, 0.0129, 0.0150, 0.0098,
c@232 21 0.0312, 0.0521, 0.0235, 0.0129, 0.0142, 0.0095, 0.0289, 0.0478, 0.0239};
c@232 22
c@232 23 static double MinProfile[36] =
c@232 24 { 0.0375, 0.0682, 0.0299, 0.0119, 0.0138, 0.0093, 0.0296, 0.0543, 0.0257,
c@232 25 0.0292, 0.0519, 0.0246, 0.0159, 0.0234, 0.0135, 0.0291, 0.0544, 0.0248,
c@232 26 0.0137, 0.0176, 0.0104, 0.0352, 0.0670, 0.0302, 0.0222, 0.0349, 0.0164,
c@232 27 0.0174, 0.0297, 0.0166, 0.0222, 0.0401, 0.0202, 0.0175, 0.0270, 0.0146};
c@232 28 //
c@265 29
c@232 30
c@232 31 //////////////////////////////////////////////////////////////////////
c@232 32 // Construction/Destruction
c@232 33 //////////////////////////////////////////////////////////////////////
c@232 34
c@234 35 GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
c@259 36 double hpcpAverage, double medianAverage ) :
c@259 37 m_hpcpAverage( hpcpAverage ),
c@259 38 m_medianAverage( medianAverage ),
c@259 39 m_ChrPointer(0),
c@259 40 m_DecimatedBuffer(0),
c@259 41 m_ChromaBuffer(0),
c@259 42 m_MeanHPCP(0),
c@259 43 m_MajCorr(0),
c@259 44 m_MinCorr(0),
c@259 45 m_Keys(0),
c@259 46 m_MedianFilterBuffer(0),
c@265 47 m_SortedBuffer(0),
c@265 48 m_keyStrengths(0)
c@232 49 {
c@259 50 m_DecimationFactor = 8;
c@259 51
c@259 52 // Chromagram configuration parameters
c@259 53 m_ChromaConfig.normalise = MathUtilities::NormaliseUnitMax;
c@259 54 m_ChromaConfig.FS = lrint(sampleRate/(double)m_DecimationFactor);
c@283 55 if (m_ChromaConfig.FS < 1) m_ChromaConfig.FS = 1;
c@232 56
c@259 57 // Set C (= MIDI #12) as our base :
c@259 58 // This implies that key = 1 => Cmaj, key = 12 => Bmaj, key = 13 => Cmin, etc.
c@259 59 m_ChromaConfig.min = Pitch::getFrequencyForPitch
c@265 60 (48, 0, tuningFrequency);
c@259 61 m_ChromaConfig.max = Pitch::getFrequencyForPitch
c@259 62 (96, 0, tuningFrequency);
c@234 63
c@259 64 m_ChromaConfig.BPO = 36;
c@259 65 m_ChromaConfig.CQThresh = 0.0054;
c@234 66
c@259 67 // Chromagram inst.
c@259 68 m_Chroma = new Chromagram( m_ChromaConfig );
c@235 69
c@259 70 // Get calculated parameters from chroma object
c@259 71 m_ChromaFrameSize = m_Chroma->getFrameSize();
c@259 72 // override hopsize for this application
c@259 73 m_ChromaHopSize = m_ChromaFrameSize;
c@259 74 m_BPO = m_ChromaConfig.BPO;
c@235 75
c@265 76 // std::cerr << "chroma frame size = " << m_ChromaFrameSize << ", decimation factor = " << m_DecimationFactor << " therefore block size = " << getBlockSize() << std::endl;
c@265 77
c@259 78 // Chromagram average and estimated key median filter lengths
c@259 79 m_ChromaBuffersize = (int)ceil( m_hpcpAverage * m_ChromaConfig.FS/m_ChromaFrameSize );
c@259 80 m_MedianWinsize = (int)ceil( m_medianAverage * m_ChromaConfig.FS/m_ChromaFrameSize );
c@259 81
c@259 82 // Reset counters
c@259 83 m_bufferindex = 0;
c@259 84 m_ChromaBufferFilling = 0;
c@259 85 m_MedianBufferFilling = 0;
c@234 86
c@259 87 // Spawn objectc/arrays
c@259 88 m_DecimatedBuffer = new double[m_ChromaFrameSize];
c@259 89
c@259 90 m_ChromaBuffer = new double[m_BPO * m_ChromaBuffersize];
c@259 91 memset( m_ChromaBuffer, 0, sizeof(double) * m_BPO * m_ChromaBuffersize);
c@259 92
c@259 93 m_MeanHPCP = new double[m_BPO];
c@259 94
c@259 95 m_MajCorr = new double[m_BPO];
c@259 96 m_MinCorr = new double[m_BPO];
c@259 97 m_Keys = new double[2*m_BPO];
c@259 98
c@259 99 m_MedianFilterBuffer = new int[ m_MedianWinsize ];
c@259 100 memset( m_MedianFilterBuffer, 0, sizeof(int)*m_MedianWinsize);
c@259 101
c@259 102 m_SortedBuffer = new int[ m_MedianWinsize ];
c@259 103 memset( m_SortedBuffer, 0, sizeof(int)*m_MedianWinsize);
c@259 104
c@259 105 m_Decimator = new Decimator
c@259 106 ( m_ChromaFrameSize*m_DecimationFactor, m_DecimationFactor );
c@265 107
c@265 108 m_keyStrengths = new double[24];
c@232 109 }
c@232 110
c@232 111 GetKeyMode::~GetKeyMode()
c@232 112 {
c@232 113
c@259 114 delete m_Chroma;
c@259 115 delete m_Decimator;
c@259 116
c@259 117 delete [] m_DecimatedBuffer;
c@259 118 delete [] m_ChromaBuffer;
c@259 119 delete [] m_MeanHPCP;
c@259 120 delete [] m_MajCorr;
c@259 121 delete [] m_MinCorr;
c@259 122 delete [] m_Keys;
c@259 123 delete [] m_MedianFilterBuffer;
c@259 124 delete [] m_SortedBuffer;
c@265 125
c@265 126 delete[] m_keyStrengths;
c@232 127 }
c@232 128
c@232 129 double GetKeyMode::krumCorr(double *pData1, double *pData2, unsigned int length)
c@232 130 {
c@259 131 double retVal= 0.0;
c@259 132
c@259 133 double num = 0;
c@259 134 double den = 0;
c@259 135 double mX = MathUtilities::mean( pData1, length );
c@259 136 double mY = MathUtilities::mean( pData2, length );
c@259 137
c@259 138 double sum1 = 0;
c@259 139 double sum2 = 0;
c@259 140
c@259 141 for( unsigned int i = 0; i <length; i++ )
c@259 142 {
c@259 143 num += ( pData1[i] - mX ) * ( pData2[i] - mY );
c@232 144
c@259 145 sum1 += ( (pData1[i]-mX) * (pData1[i]-mX) );
c@259 146 sum2 += ( (pData2[i]-mY) * (pData2[i]-mY) );
c@259 147 }
c@259 148
c@259 149 den = sqrt(sum1 * sum2);
c@259 150
c@259 151 if( den>0 )
c@259 152 retVal = num/den;
c@259 153 else
c@259 154 retVal = 0;
c@232 155
c@232 156
c@259 157 return retVal;
c@232 158 }
c@232 159
c@232 160 int GetKeyMode::process(double *PCMData)
c@232 161 {
c@259 162 int key;
c@232 163
c@259 164 unsigned int j,k;
c@232 165
c@259 166 //////////////////////////////////////////////
c@259 167 m_Decimator->process( PCMData, m_DecimatedBuffer);
c@232 168
c@259 169 m_ChrPointer = m_Chroma->process( m_DecimatedBuffer );
c@232 170
devnull@240 171
c@259 172 // Move bins such that the centre of the base note is in the
c@259 173 // middle of its three bins :
c@259 174 // Added 21.11.07 by Chris Sutton based on debugging with Katy
c@259 175 // Noland + comparison with Matlab equivalent.
c@259 176 MathUtilities::circShift( m_ChrPointer, m_BPO, 1);
c@248 177 /*
c@298 178 std::cout << "raw chroma: ";
c@298 179 for (int ii = 0; ii < m_BPO; ++ii) {
c@298 180 if (ii % (m_BPO/12) == 0) std::cout << "\n";
c@298 181 std::cout << m_ChrPointer[ii] << " ";
c@298 182 }
c@298 183 std::cout << std::endl;
c@248 184 */
c@259 185 // populate hpcp values;
c@259 186 int cbidx;
c@259 187 for( j = 0; j < m_BPO; j++ )
c@259 188 {
c@259 189 cbidx = (m_bufferindex * m_BPO) + j;
c@259 190 m_ChromaBuffer[ cbidx ] = m_ChrPointer[j];
c@259 191 }
c@232 192
c@259 193 //keep track of input buffers;
c@259 194 if( m_bufferindex++ >= m_ChromaBuffersize - 1)
c@259 195 m_bufferindex = 0;
c@232 196
c@259 197 // track filling of chroma matrix
c@259 198 if( m_ChromaBufferFilling++ >= m_ChromaBuffersize)
c@259 199 m_ChromaBufferFilling = m_ChromaBuffersize;
c@232 200
c@259 201 //calculate mean
c@259 202 for( k = 0; k < m_BPO; k++ )
c@259 203 {
c@259 204 double mnVal = 0.0;
c@259 205 for( j = 0; j < m_ChromaBufferFilling; j++ )
c@259 206 {
c@259 207 mnVal += m_ChromaBuffer[ k + (j*m_BPO) ];
c@259 208 }
c@232 209
c@259 210 m_MeanHPCP[k] = mnVal/(double)m_ChromaBufferFilling;
c@259 211 }
c@232 212
c@232 213
c@259 214 for( k = 0; k < m_BPO; k++ )
c@259 215 {
c@259 216 m_MajCorr[k] = krumCorr( m_MeanHPCP, MajProfile, m_BPO );
c@259 217 m_MinCorr[k] = krumCorr( m_MeanHPCP, MinProfile, m_BPO );
c@232 218
c@259 219 MathUtilities::circShift( MajProfile, m_BPO, 1 );
c@259 220 MathUtilities::circShift( MinProfile, m_BPO, 1 );
c@259 221 }
devnull@240 222
c@259 223 for( k = 0; k < m_BPO; k++ )
c@259 224 {
c@259 225 m_Keys[k] = m_MajCorr[k];
c@259 226 m_Keys[k+m_BPO] = m_MinCorr[k];
c@259 227 }
devnull@240 228
c@265 229 for (k = 0; k < 24; ++k) {
c@265 230 m_keyStrengths[k] = 0;
c@265 231 }
c@265 232
c@265 233 for( k = 0; k < m_BPO*2; k++ )
c@265 234 {
c@298 235 int idx = k / (m_BPO/12);
c@298 236 int rem = k % (m_BPO/12);
c@298 237 if (rem == 0 || m_Keys[k] > m_keyStrengths[idx]) {
c@298 238 m_keyStrengths[idx] = m_Keys[k];
c@298 239 }
c@298 240
c@298 241 // m_keyStrengths[k/(m_BPO/12)] += m_Keys[k];
c@265 242 }
devnull@240 243
c@248 244 /*
c@259 245 std::cout << "raw keys: ";
c@259 246 for (int ii = 0; ii < 2*m_BPO; ++ii) {
c@298 247 if (ii % (m_BPO/12) == 0) std::cout << "\n";
c@298 248 std::cout << m_Keys[ii] << " ";
c@298 249 }
c@298 250 std::cout << std::endl;
c@298 251
c@298 252 std::cout << "key strengths: ";
c@298 253 for (int ii = 0; ii < 24; ++ii) {
c@298 254 if (ii % 6 == 0) std::cout << "\n";
c@298 255 std::cout << m_keyStrengths[ii] << " ";
c@259 256 }
c@259 257 std::cout << std::endl;
c@248 258 */
c@259 259 double dummy;
c@259 260 // '1 +' because we number keys 1-24, not 0-23.
c@259 261 key = 1 + (int)ceil( (double)MathUtilities::getMax( m_Keys, 2* m_BPO, &dummy )/3 );
c@234 262
c@298 263 // std::cout << "key pre-sorting: " << key << std::endl;
c@232 264
c@232 265
c@259 266 //Median filtering
c@232 267
c@259 268 // track Median buffer initial filling
c@259 269 if( m_MedianBufferFilling++ >= m_MedianWinsize)
c@259 270 m_MedianBufferFilling = m_MedianWinsize;
c@232 271
c@259 272 //shift median buffer
c@259 273 for( k = 1; k < m_MedianWinsize; k++ )
c@259 274 {
c@259 275 m_MedianFilterBuffer[ k - 1 ] = m_MedianFilterBuffer[ k ];
c@259 276 }
c@232 277
c@259 278 //write new key value into median buffer
c@259 279 m_MedianFilterBuffer[ m_MedianWinsize - 1 ] = key;
c@232 280
c@232 281
c@259 282 //Copy median into sorting buffer, reversed
c@259 283 unsigned int ijx = 0;
c@259 284 for( k = 0; k < m_MedianWinsize; k++ )
c@259 285 {
c@259 286 m_SortedBuffer[k] = m_MedianFilterBuffer[m_MedianWinsize-1-ijx];
c@259 287 ijx++;
c@259 288 }
c@232 289
c@259 290 qsort(m_SortedBuffer, m_MedianBufferFilling, sizeof(unsigned int),
c@259 291 MathUtilities::compareInt);
c@259 292 /*
c@259 293 std::cout << "sorted: ";
c@259 294 for (int ii = 0; ii < m_MedianBufferFilling; ++ii) {
c@259 295 std::cout << m_SortedBuffer[ii] << " ";
c@259 296 }
c@259 297 std::cout << std::endl;
c@259 298 */
c@259 299 int sortlength = m_MedianBufferFilling;
c@259 300 int midpoint = (int)ceil((double)sortlength/2);
c@232 301
c@298 302 // std::cout << "midpoint = " << midpoint << endl;
c@298 303
c@259 304 if( midpoint <= 0 )
c@259 305 midpoint = 1;
c@232 306
c@259 307 key = m_SortedBuffer[midpoint-1];
c@232 308
c@298 309 // std::cout << "returning key = " << key << endl;
c@298 310
c@259 311 return key;
c@232 312 }
c@232 313
c@232 314
c@268 315 bool GetKeyMode::isModeMinor( int key )
c@232 316 {
c@259 317 return (key > 12);
c@232 318 }