annotate dsp/keydetection/GetKeyMode.cpp @ 475:64fc3009d0a3

Switch from DOS to Unix line-endings
author Chris Cannam <cannam@all-day-breakfast.com>
date Thu, 30 May 2019 18:28:11 +0100
parents 0076c66d2932
children c92718cc6ef1
rev   line source
cannam@475 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@475 2 /*
cannam@475 3 QM DSP Library
cannam@475 4
cannam@475 5 Centre for Digital Music, Queen Mary, University of London.
cannam@475 6 This file 2005-2006 Christian Landone and Katy Noland.
cannam@475 7
cannam@475 8 Fixes to correct chroma offsets and for thread safety contributed
cannam@475 9 by Daniel Schürmann.
cannam@475 10
cannam@475 11 This program is free software; you can redistribute it and/or
cannam@475 12 modify it under the terms of the GNU General Public License as
cannam@475 13 published by the Free Software Foundation; either version 2 of the
cannam@475 14 License, or (at your option) any later version. See the file
cannam@475 15 COPYING included with this distribution for more information.
cannam@475 16 */
cannam@475 17
cannam@475 18 #include "GetKeyMode.h"
cannam@475 19 #include "maths/MathUtilities.h"
cannam@475 20 #include "base/Pitch.h"
cannam@475 21
cannam@475 22 #include <iostream>
cannam@475 23
cannam@475 24 #include <cstring>
cannam@475 25 #include <cstdlib>
cannam@475 26
cannam@475 27 static const int kBinsPerOctave = 36;
cannam@475 28
cannam@475 29 // Chords profile
cannam@475 30 static double MajProfile[kBinsPerOctave] = {
cannam@475 31 0.0384, 0.0629, 0.0258, 0.0121, 0.0146, 0.0106, 0.0364, 0.0610, 0.0267,
cannam@475 32 0.0126, 0.0121, 0.0086, 0.0364, 0.0623, 0.0279, 0.0275, 0.0414, 0.0186,
cannam@475 33 0.0173, 0.0248, 0.0145, 0.0364, 0.0631, 0.0262, 0.0129, 0.0150, 0.0098,
cannam@475 34 0.0312, 0.0521, 0.0235, 0.0129, 0.0142, 0.0095, 0.0289, 0.0478, 0.0239};
cannam@475 35
cannam@475 36 static double MinProfile[kBinsPerOctave] = {
cannam@475 37 0.0375, 0.0682, 0.0299, 0.0119, 0.0138, 0.0093, 0.0296, 0.0543, 0.0257,
cannam@475 38 0.0292, 0.0519, 0.0246, 0.0159, 0.0234, 0.0135, 0.0291, 0.0544, 0.0248,
cannam@475 39 0.0137, 0.0176, 0.0104, 0.0352, 0.0670, 0.0302, 0.0222, 0.0349, 0.0164,
cannam@475 40 0.0174, 0.0297, 0.0166, 0.0222, 0.0401, 0.0202, 0.0175, 0.0270, 0.0146};
cannam@475 41 //
cannam@475 42
cannam@475 43
cannam@475 44 //////////////////////////////////////////////////////////////////////
cannam@475 45 // Construction/Destruction
cannam@475 46 //////////////////////////////////////////////////////////////////////
cannam@475 47
cannam@475 48 GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
cannam@475 49 double hpcpAverage, double medianAverage ) :
cannam@475 50 m_hpcpAverage( hpcpAverage ),
cannam@475 51 m_medianAverage( medianAverage ),
cannam@475 52 m_ChrPointer(0),
cannam@475 53 m_DecimatedBuffer(0),
cannam@475 54 m_ChromaBuffer(0),
cannam@475 55 m_MeanHPCP(0),
cannam@475 56 m_MajCorr(0),
cannam@475 57 m_MinCorr(0),
cannam@475 58 m_MedianFilterBuffer(0),
cannam@475 59 m_SortedBuffer(0),
cannam@475 60 m_keyStrengths(0)
cannam@475 61 {
cannam@475 62 m_DecimationFactor = 8;
cannam@475 63
cannam@475 64 // Chromagram configuration parameters
cannam@475 65 m_ChromaConfig.normalise = MathUtilities::NormaliseUnitMax;
cannam@475 66 m_ChromaConfig.FS = sampleRate/(double)m_DecimationFactor;
cannam@475 67 if (m_ChromaConfig.FS < 1) {
cannam@475 68 m_ChromaConfig.FS = 1;
cannam@475 69 }
cannam@475 70
cannam@475 71 // Set C3 (= MIDI #48) as our base:
cannam@475 72 // This implies that key = 1 => Cmaj, key = 12 => Bmaj, key = 13 => Cmin, etc.
cannam@475 73 m_ChromaConfig.min = Pitch::getFrequencyForPitch( 48, 0, tuningFrequency );
cannam@475 74 m_ChromaConfig.max = Pitch::getFrequencyForPitch( 96, 0, tuningFrequency );
cannam@475 75
cannam@475 76 m_ChromaConfig.BPO = kBinsPerOctave;
cannam@475 77 m_ChromaConfig.CQThresh = 0.0054;
cannam@475 78
cannam@475 79 // Chromagram inst.
cannam@475 80 m_Chroma = new Chromagram( m_ChromaConfig );
cannam@475 81
cannam@475 82 // Get calculated parameters from chroma object
cannam@475 83 m_ChromaFrameSize = m_Chroma->getFrameSize();
cannam@475 84 // override hopsize for this application
cannam@475 85 m_ChromaHopSize = m_ChromaFrameSize;
cannam@475 86
cannam@475 87 // std::cerr << "chroma frame size = " << m_ChromaFrameSize << ", decimation factor = " << m_DecimationFactor << " therefore block size = " << getBlockSize() << std::endl;
cannam@475 88
cannam@475 89 // Chromagram average and estimated key median filter lengths
cannam@475 90 m_ChromaBuffersize = (int)ceil( m_hpcpAverage * m_ChromaConfig.FS/m_ChromaFrameSize );
cannam@475 91 m_MedianWinsize = (int)ceil( m_medianAverage * m_ChromaConfig.FS/m_ChromaFrameSize );
cannam@475 92
cannam@475 93 // Reset counters
cannam@475 94 m_bufferindex = 0;
cannam@475 95 m_ChromaBufferFilling = 0;
cannam@475 96 m_MedianBufferFilling = 0;
cannam@475 97
cannam@475 98 // Spawn objectc/arrays
cannam@475 99 m_DecimatedBuffer = new double[m_ChromaFrameSize];
cannam@475 100
cannam@475 101 m_ChromaBuffer = new double[kBinsPerOctave * m_ChromaBuffersize];
cannam@475 102 memset( m_ChromaBuffer, 0, sizeof(double) * kBinsPerOctave * m_ChromaBuffersize);
cannam@475 103
cannam@475 104 m_MeanHPCP = new double[kBinsPerOctave];
cannam@475 105
cannam@475 106 m_MajCorr = new double[kBinsPerOctave];
cannam@475 107 m_MinCorr = new double[kBinsPerOctave];
cannam@475 108
cannam@475 109 m_MajProfileNorm = new double[kBinsPerOctave];
cannam@475 110 m_MinProfileNorm = new double[kBinsPerOctave];
cannam@475 111
cannam@475 112 double mMaj = MathUtilities::mean( MajProfile, kBinsPerOctave );
cannam@475 113 double mMin = MathUtilities::mean( MinProfile, kBinsPerOctave );
cannam@475 114
cannam@475 115 for( unsigned int i = 0; i < kBinsPerOctave; i++ ) {
cannam@475 116 m_MajProfileNorm[i] = MajProfile[i] - mMaj;
cannam@475 117 m_MinProfileNorm[i] = MinProfile[i] - mMin;
cannam@475 118 }
cannam@475 119
cannam@475 120 m_MedianFilterBuffer = new int[ m_MedianWinsize ];
cannam@475 121 memset( m_MedianFilterBuffer, 0, sizeof(int)*m_MedianWinsize);
cannam@475 122
cannam@475 123 m_SortedBuffer = new int[ m_MedianWinsize ];
cannam@475 124 memset( m_SortedBuffer, 0, sizeof(int)*m_MedianWinsize);
cannam@475 125
cannam@475 126 m_Decimator = new Decimator( m_ChromaFrameSize*m_DecimationFactor, m_DecimationFactor );
cannam@475 127
cannam@475 128 m_keyStrengths = new double[24];
cannam@475 129 }
cannam@475 130
cannam@475 131 GetKeyMode::~GetKeyMode()
cannam@475 132 {
cannam@475 133 delete m_Chroma;
cannam@475 134 delete m_Decimator;
cannam@475 135
cannam@475 136 delete [] m_DecimatedBuffer;
cannam@475 137 delete [] m_ChromaBuffer;
cannam@475 138 delete [] m_MeanHPCP;
cannam@475 139 delete [] m_MajCorr;
cannam@475 140 delete [] m_MinCorr;
cannam@475 141 delete [] m_MajProfileNorm;
cannam@475 142 delete [] m_MinProfileNorm;
cannam@475 143 delete [] m_MedianFilterBuffer;
cannam@475 144 delete [] m_SortedBuffer;
cannam@475 145 delete [] m_keyStrengths;
cannam@475 146 }
cannam@475 147
cannam@475 148 double GetKeyMode::krumCorr( const double *pDataNorm, const double *pProfileNorm,
cannam@475 149 int shiftProfile, unsigned int length)
cannam@475 150 {
cannam@475 151 double retVal= 0.0;
cannam@475 152
cannam@475 153 double num = 0;
cannam@475 154 double den = 0;
cannam@475 155 double sum1 = 0;
cannam@475 156 double sum2 = 0;
cannam@475 157
cannam@475 158 for( unsigned int i = 0; i <length; i++ )
cannam@475 159 {
cannam@475 160 int k = (i - shiftProfile + length) % length;
cannam@475 161
cannam@475 162 num += pDataNorm[i] * pProfileNorm[k];
cannam@475 163
cannam@475 164 sum1 += ( pDataNorm[i] * pDataNorm[i] );
cannam@475 165 sum2 += ( pProfileNorm[k] * pProfileNorm[k] );
cannam@475 166 }
cannam@475 167
cannam@475 168 den = sqrt(sum1 * sum2);
cannam@475 169
cannam@475 170 if( den>0 ) {
cannam@475 171 retVal = num/den;
cannam@475 172 } else {
cannam@475 173 retVal = 0;
cannam@475 174 }
cannam@475 175
cannam@475 176 return retVal;
cannam@475 177 }
cannam@475 178
cannam@475 179 int GetKeyMode::process(double *PCMData)
cannam@475 180 {
cannam@475 181 int key;
cannam@475 182 unsigned int j,k;
cannam@475 183
cannam@475 184 //////////////////////////////////////////////
cannam@475 185 m_Decimator->process( PCMData, m_DecimatedBuffer);
cannam@475 186
cannam@475 187 m_ChrPointer = m_Chroma->process( m_DecimatedBuffer );
cannam@475 188
cannam@475 189 /*
cannam@475 190 std::cout << "raw chroma: ";
cannam@475 191 for (int ii = 0; ii < kBinsPerOctave; ++ii) {
cannam@475 192 if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n";
cannam@475 193 std::cout << m_ChrPointer[ii] << " ";
cannam@475 194 }
cannam@475 195 std::cout << std::endl;
cannam@475 196 */
cannam@475 197 // populate hpcp values;
cannam@475 198 int cbidx;
cannam@475 199 for( j = 0; j < kBinsPerOctave; j++ ) {
cannam@475 200 cbidx = (m_bufferindex * kBinsPerOctave) + j;
cannam@475 201 m_ChromaBuffer[ cbidx ] = m_ChrPointer[j];
cannam@475 202 }
cannam@475 203
cannam@475 204 //keep track of input buffers;
cannam@475 205 if( m_bufferindex++ >= m_ChromaBuffersize - 1) {
cannam@475 206 m_bufferindex = 0;
cannam@475 207 }
cannam@475 208
cannam@475 209 // track filling of chroma matrix
cannam@475 210 if( m_ChromaBufferFilling++ >= m_ChromaBuffersize) {
cannam@475 211 m_ChromaBufferFilling = m_ChromaBuffersize;
cannam@475 212 }
cannam@475 213
cannam@475 214 //calculate mean
cannam@475 215 for( k = 0; k < kBinsPerOctave; k++ ) {
cannam@475 216 double mnVal = 0.0;
cannam@475 217 for( j = 0; j < m_ChromaBufferFilling; j++ ) {
cannam@475 218 mnVal += m_ChromaBuffer[ k + (j*kBinsPerOctave) ];
cannam@475 219 }
cannam@475 220
cannam@475 221 m_MeanHPCP[k] = mnVal/(double)m_ChromaBufferFilling;
cannam@475 222 }
cannam@475 223
cannam@475 224 // Normalize for zero average
cannam@475 225 double mHPCP = MathUtilities::mean( m_MeanHPCP, kBinsPerOctave );
cannam@475 226 for( k = 0; k < kBinsPerOctave; k++ )
cannam@475 227 {
cannam@475 228 m_MeanHPCP[k] -= mHPCP;
cannam@475 229 }
cannam@475 230
cannam@475 231
cannam@475 232 for( k = 0; k < kBinsPerOctave; k++ )
cannam@475 233 {
cannam@475 234 // The Cromagram has the center of C at bin 0, while the major
cannam@475 235 // and minor profiles have the center of C at 1. We want to have
cannam@475 236 // the correlation for C result also at 1.
cannam@475 237 // To achieve this we have to shift two times:
cannam@475 238 m_MajCorr[k] = krumCorr( m_MeanHPCP, m_MajProfileNorm, (int)k - 2, kBinsPerOctave );
cannam@475 239 m_MinCorr[k] = krumCorr( m_MeanHPCP, m_MinProfileNorm, (int)k - 2, kBinsPerOctave );
cannam@475 240 }
cannam@475 241
cannam@475 242 /*
cannam@475 243 std::cout << "raw keys: ";
cannam@475 244 for (int ii = 0; ii < kBinsPerOctave; ++ii) {
cannam@475 245 if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n";
cannam@475 246 std::cout << m_MajCorr[ii] << " ";
cannam@475 247 }
cannam@475 248 for (int ii = 0; ii < kBinsPerOctave; ++ii) {
cannam@475 249 if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n";
cannam@475 250 std::cout << m_MinCorr[ii] << " ";
cannam@475 251 }
cannam@475 252 std::cout << std::endl;
cannam@475 253 */
cannam@475 254
cannam@475 255 // m_MajCorr[1] is C center 1 / 3 + 1 = 1
cannam@475 256 // m_MajCorr[4] is D center 4 / 3 + 1 = 2
cannam@475 257 // '+ 1' because we number keys 1-24, not 0-23.
cannam@475 258 double maxMaj;
cannam@475 259 int maxMajBin = MathUtilities::getMax( m_MajCorr, kBinsPerOctave, &maxMaj );
cannam@475 260 double maxMin;
cannam@475 261 int maxMinBin = MathUtilities::getMax( m_MinCorr, kBinsPerOctave, &maxMin );
cannam@475 262 int maxBin = (maxMaj > maxMin) ? maxMajBin : (maxMinBin + kBinsPerOctave);
cannam@475 263 key = maxBin / 3 + 1;
cannam@475 264
cannam@475 265 // std::cout << "fractional key pre-sorting: " << (maxBin + 2) / 3.0 << std::endl;
cannam@475 266 // std::cout << "key pre-sorting: " << key << std::endl;
cannam@475 267
cannam@475 268
cannam@475 269 //Median filtering
cannam@475 270
cannam@475 271 // track Median buffer initial filling
cannam@475 272 if( m_MedianBufferFilling++ >= m_MedianWinsize) {
cannam@475 273 m_MedianBufferFilling = m_MedianWinsize;
cannam@475 274 }
cannam@475 275
cannam@475 276 //shift median buffer
cannam@475 277 for( k = 1; k < m_MedianWinsize; k++ ) {
cannam@475 278 m_MedianFilterBuffer[ k - 1 ] = m_MedianFilterBuffer[ k ];
cannam@475 279 }
cannam@475 280
cannam@475 281 //write new key value into median buffer
cannam@475 282 m_MedianFilterBuffer[ m_MedianWinsize - 1 ] = key;
cannam@475 283
cannam@475 284
cannam@475 285 //Copy median into sorting buffer, reversed
cannam@475 286 unsigned int ijx = 0;
cannam@475 287 for( k = 0; k < m_MedianWinsize; k++ ) {
cannam@475 288 m_SortedBuffer[k] = m_MedianFilterBuffer[m_MedianWinsize-1-ijx];
cannam@475 289 ijx++;
cannam@475 290 }
cannam@475 291
cannam@475 292 qsort(m_SortedBuffer, m_MedianBufferFilling, sizeof(unsigned int),
cannam@475 293 MathUtilities::compareInt);
cannam@475 294 /*
cannam@475 295 std::cout << "sorted: ";
cannam@475 296 for (int ii = 0; ii < m_MedianBufferFilling; ++ii) {
cannam@475 297 std::cout << m_SortedBuffer[ii] << " ";
cannam@475 298 }
cannam@475 299 std::cout << std::endl;
cannam@475 300 */
cannam@475 301 int sortlength = m_MedianBufferFilling;
cannam@475 302 int midpoint = (int)ceil((double)sortlength/2);
cannam@475 303
cannam@475 304 // std::cout << "midpoint = " << midpoint << endl;
cannam@475 305
cannam@475 306 if( midpoint <= 0 ) {
cannam@475 307 midpoint = 1;
cannam@475 308 }
cannam@475 309
cannam@475 310 key = m_SortedBuffer[midpoint-1];
cannam@475 311
cannam@475 312 // std::cout << "returning key = " << key << endl;
cannam@475 313
cannam@475 314 return key;
cannam@475 315 }
cannam@475 316
cannam@475 317
cannam@475 318 bool GetKeyMode::isModeMinor( int key )
cannam@475 319 {
cannam@475 320 return (key > 12);
cannam@475 321 }
cannam@475 322
cannam@475 323 unsigned int getChromaSize()
cannam@475 324 {
cannam@475 325 return kBinsPerOctave;
cannam@475 326 }
cannam@475 327
cannam@475 328 double* GetKeyMode::getKeyStrengths() {
cannam@475 329 unsigned int k;
cannam@475 330
cannam@475 331 for (k = 0; k < 24; ++k) {
cannam@475 332 m_keyStrengths[k] = 0;
cannam@475 333 }
cannam@475 334
cannam@475 335 for( k = 0; k < kBinsPerOctave; k++ )
cannam@475 336 {
cannam@475 337 int idx = k / (kBinsPerOctave/12);
cannam@475 338 int rem = k % (kBinsPerOctave/12);
cannam@475 339 if (rem == 0 || m_MajCorr[k] > m_keyStrengths[idx]) {
cannam@475 340 m_keyStrengths[idx] = m_MajCorr[k];
cannam@475 341 }
cannam@475 342 }
cannam@475 343
cannam@475 344 for( k = 0; k < kBinsPerOctave; k++ )
cannam@475 345 {
cannam@475 346 int idx = (k + kBinsPerOctave) / (kBinsPerOctave/12);
cannam@475 347 int rem = k % (kBinsPerOctave/12);
cannam@475 348 if (rem == 0 || m_MinCorr[k] > m_keyStrengths[idx]) {
cannam@475 349 m_keyStrengths[idx] = m_MinCorr[k];
cannam@475 350 }
cannam@475 351 }
cannam@475 352
cannam@475 353 /*
cannam@475 354 std::cout << "key strengths: ";
cannam@475 355 for (int ii = 0; ii < 24; ++ii) {
cannam@475 356 if (ii % 6 == 0) std::cout << "\n";
cannam@475 357 std::cout << m_keyStrengths[ii] << " ";
cannam@475 358 }
cannam@475 359 std::cout << std::endl;
cannam@475 360 */
cannam@475 361
cannam@475 362 return m_keyStrengths;
cannam@475 363 }