Mercurial > hg > multitrack-audio-matcher
comparison src/AudioEventMatcher.cpp @ 37:9806a4f22fd0
Fixed bugs in the likelohoods that caused some to zero when no events found.
author | Andrew N Robertson <andrew.robertson@eecs.qmul.ac.uk> |
---|---|
date | Tue, 24 Apr 2012 14:16:01 +0100 |
parents | eb43b2a007ea |
children | f5de07b4d733 |
comparison
equal
deleted
inserted
replaced
36:eb43b2a007ea | 37:9806a4f22fd0 |
---|---|
14 const float pitchCutOff = 16;//within which pitches are even considered | 14 const float pitchCutOff = 16;//within which pitches are even considered |
15 | 15 |
16 AudioEventMatcher::AudioEventMatcher(){ | 16 AudioEventMatcher::AudioEventMatcher(){ |
17 | 17 |
18 useChromaDotProduct = false; | 18 useChromaDotProduct = false; |
19 | |
20 printingData = false; | |
19 | 21 |
20 pitchLikelihoodToNoise = 0.6;//more noise | 22 pitchLikelihoodToNoise = 0.6;//more noise |
21 chromaLikelihoodToNoise = 0.5;//lower => more noise, higher more weight for events | 23 chromaLikelihoodToNoise = 0.5;//lower => more noise, higher more weight for events |
22 chromaLikelihoodWidth = 50;//ms round onset event | 24 chromaLikelihoodWidth = 50;//ms round onset event |
23 | 25 |
144 temporal.tempoPosterior.addGaussianShapeFromRealTime(recordedTempo, 2000, 1); | 146 temporal.tempoPosterior.addGaussianShapeFromRealTime(recordedTempo, 2000, 1); |
145 | 147 |
146 //SET TEMPO PRIOR for Speed Ratio | 148 //SET TEMPO PRIOR for Speed Ratio |
147 //the update this | 149 //the update this |
148 setSpeedRatioDistribution(currentSpeedRatio); | 150 setSpeedRatioDistribution(currentSpeedRatio); |
151 | |
152 euclideanMaximumDistance = 0; | |
153 | |
149 //bayesianStruct.posterior.printArray(); | 154 //bayesianStruct.posterior.printArray(); |
150 } | 155 } |
151 | 156 |
152 | 157 |
153 void AudioEventMatcher::setSpeedRatioDistribution(const double& speedRatio){ | 158 void AudioEventMatcher::setSpeedRatioDistribution(const double& speedRatio){ |
156 bayesianStruct.relativeSpeedPosterior.addGaussianShapeFromRealTime(1, 0.06, 0.8); | 161 bayesianStruct.relativeSpeedPosterior.addGaussianShapeFromRealTime(1, 0.06, 0.8); |
157 } | 162 } |
158 | 163 |
159 void AudioEventMatcher::stopPlaying(){ | 164 void AudioEventMatcher::stopPlaying(){ |
160 startedPlaying = false; | 165 startedPlaying = false; |
161 temporal.printEventTimes(); | 166 //temporal.printEventTimes(); |
162 } | 167 } |
163 | 168 |
164 void AudioEventMatcher::rescue(){ | 169 void AudioEventMatcher::rescue(){ |
165 bayesianStruct.posterior.zero(); | 170 bayesianStruct.posterior.zero(); |
166 bayesianStruct.posterior.addConstant(1); | 171 bayesianStruct.posterior.addConstant(1); |
184 temporal.tempoPosterior.addGaussianShape(temporal.tempoPosterior.MAPestimate, temporal.tempoArraySize / 4, 0.5 ); | 189 temporal.tempoPosterior.addGaussianShape(temporal.tempoPosterior.MAPestimate, temporal.tempoArraySize / 4, 0.5 ); |
185 } | 190 } |
186 | 191 |
187 void AudioEventMatcher::updateRecordedTempo(){ | 192 void AudioEventMatcher::updateRecordedTempo(){ |
188 //tempo of equivalent recorded position is updated | 193 //tempo of equivalent recorded position is updated |
194 if (recordedTempoIndex < recordedTempoData.globalTempoTimes.size()){//if for debug | |
189 while(currentAlignmentPosition > recordedTempoData.globalTempoTimes[recordedTempoIndex]){ | 195 while(currentAlignmentPosition > recordedTempoData.globalTempoTimes[recordedTempoIndex]){ |
190 recordedTempoIndex++; | 196 recordedTempoIndex++; |
191 } | 197 } |
192 recordedTempo = recordedTempoData.globalTempo[recordedTempoIndex]; | 198 recordedTempo = recordedTempoData.globalTempo[recordedTempoIndex]; |
193 double tmpRatio = currentSpeedRatio; | 199 double tmpRatio = currentSpeedRatio; |
194 currentSpeedRatio = temporal.playingTempo / recordedTempo; | 200 currentSpeedRatio = temporal.playingTempo / recordedTempo; |
195 if (currentSpeedRatio != tmpRatio) | 201 if (currentSpeedRatio != tmpRatio) |
196 setSpeedRatioDistribution(currentSpeedRatio); | 202 setSpeedRatioDistribution(currentSpeedRatio); |
203 | |
204 }//end if to prevent debug crash | |
197 } | 205 } |
198 | 206 |
199 void AudioEventMatcher::updateBestAlignmentPosition(){ | 207 void AudioEventMatcher::updateBestAlignmentPosition(){ |
200 //THIS DEALS WITH WHERE WE ARE NOW! ON THE SCREEN | 208 //THIS DEALS WITH WHERE WE ARE NOW! ON THE SCREEN |
201 //DIFFERENT TO WHEN EVENTS COME IN AS THEY ARE TIMESTAMPED - SO EG A PITCH EVENT MAY ARRIVE 16 CHROMA FRAMES LATER - BIG DIFFERENCE | 209 //DIFFERENT TO WHEN EVENTS COME IN AS THEY ARE TIMESTAMPED - SO EG A PITCH EVENT MAY ARRIVE 16 CHROMA FRAMES LATER - BIG DIFFERENCE |
341 bayesianStruct.prior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); | 349 bayesianStruct.prior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); |
342 | 350 |
343 ofSetColor(255,0,0);//projected prior in red | 351 ofSetColor(255,0,0);//projected prior in red |
344 projectedPrior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); | 352 projectedPrior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); |
345 | 353 |
346 | 354 //draw pitch |
347 | 355 ofSetColor(0,100,255); |
356 int index = getScreenWidthIndexOfEventTime(recentPitchEventTime); | |
357 //this window would be used (recordedTracks.loadedAudioFiles[1].fileLoader.onsetDetect.window); | |
358 | |
359 | |
360 } | |
361 | |
362 int AudioEventMatcher::getScreenWidthIndexOfEventTime(const double& time){ | |
363 return (time - screenStartTimeMillis)*ofGetWidth()/screenWidthMillis; | |
348 } | 364 } |
349 | 365 |
350 void AudioEventMatcher::drawTrackLikelihoods(){ | 366 void AudioEventMatcher::drawTrackLikelihoods(){ |
351 //draw track by track likelihoods | 367 //draw track by track likelihoods |
352 for (int i = 0; i <recordedTracks.numberOfAudioTracks;i++){ | 368 for (int i = 0; i <recordedTracks.numberOfAudioTracks;i++){ |
374 | 390 |
375 ofSetColor(255); | 391 ofSetColor(255); |
376 tmpStr = "pitch "+ofToString(recentPitch, 2); | 392 tmpStr = "pitch "+ofToString(recentPitch, 2); |
377 tmpStr += " Nearest "+ofToString(pitchOfNearestMatch,2); | 393 tmpStr += " Nearest "+ofToString(pitchOfNearestMatch,2); |
378 tmpStr += " dist "+ofToString(distanceOfNearestMatch, 2); | 394 tmpStr += " dist "+ofToString(distanceOfNearestMatch, 2); |
379 tmpStr += ", Time "+ofToString(recentTime, 0); | 395 tmpStr += ", Time "+ofToString(recentPitchEventTime, 0); |
380 ofDrawBitmapString(tmpStr, 20, 20); | 396 ofDrawBitmapString(tmpStr, 20, 20); |
381 | 397 |
382 string alignString = " align "+ofToString(currentAlignmentPosition, 2); | 398 string alignString = " align "+ofToString(currentAlignmentPosition, 2); |
383 alignString += " playing "+ofToString(synchroniser.playingPositionRatio, 5); | 399 alignString += " playing "+ofToString(synchroniser.playingPositionRatio, 5); |
384 alignString += " pos "+ofToString(synchroniser.playingPositionMillis,0)+" ms"; | 400 alignString += " pos "+ofToString(synchroniser.playingPositionMillis,0)+" ms"; |
398 matchNewPitchEvent(channel, pitchIn, timeIn);//main pitch matching fn | 414 matchNewPitchEvent(channel, pitchIn, timeIn);//main pitch matching fn |
399 | 415 |
400 likelihoodVisualisation[1] = bayesianStruct.likelihood; | 416 likelihoodVisualisation[1] = bayesianStruct.likelihood; |
401 | 417 |
402 recentPitch = pitchIn;//for drawing | 418 recentPitch = pitchIn;//for drawing |
403 recentTime = timeIn; | 419 recentPitchEventTime = timeIn; |
404 } | 420 } |
405 } | 421 } |
406 | 422 |
407 | 423 |
408 void AudioEventMatcher::newChromaEvent(const int& channel, float* chromaIn, const double& timeIn){ | 424 void AudioEventMatcher::newChromaEvent(const int& channel, float* chromaIn, const double& timeIn){ |
409 | 425 |
410 // could add event to the liveInput list? as in pitch event | 426 // could add event to the liveInput list? as in pitch event |
411 printf("match chroma channel %i\n", channel); | 427 if (printingData){ |
412 for (int i = 0;i < 12;i++){ | 428 printf("match chroma channel %i\n", channel); |
429 for (int i = 0;i < 12;i++){ | |
413 printf("chroma in[%i] = %f\n", i, chromaIn[i]); | 430 printf("chroma in[%i] = %f\n", i, chromaIn[i]); |
431 } | |
414 } | 432 } |
415 | 433 |
416 matchNewChromaEvent(channel, chromaIn, timeIn);//main pitch matching fn | 434 matchNewChromaEvent(channel, chromaIn, timeIn);//main pitch matching fn |
417 | 435 |
418 likelihoodVisualisation[channel] = bayesianStruct.likelihood; | 436 likelihoodVisualisation[channel] = bayesianStruct.likelihood; |
542 } | 560 } |
543 } | 561 } |
544 | 562 |
545 | 563 |
546 | 564 |
547 if (numberOfMatches > 0){//no point updating unless there is a match | 565 if (numberOfMatches > 0 && totalLikelihoodAdded > 0){//no point updating unless there is a match |
548 //replacing numberOfMatches with totalLike below... | 566 //replacing numberOfMatches with totalLike below... |
549 bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-pitchLikelihoodToNoise)/(pitchLikelihoodToNoise*bayesianStruct.likelihood.length)); | 567 //bug here was that if totaladded = 0, we add then zero likelihood |
568 bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-pitchLikelihoodToNoise)/(bayesianStruct.likelihood.length)); | |
569 // bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-pitchLikelihoodToNoise)/(pitchLikelihoodToNoise*bayesianStruct.likelihood.length)); | |
550 | 570 |
551 //tmp set likelihood constant and calculate using that | 571 //tmp set likelihood constant and calculate using that |
552 //bayesianStruct.likelihood.zero(); | 572 //bayesianStruct.likelihood.zero(); |
553 //bayesianStruct.likelihood.addConstant(1); | 573 //bayesianStruct.likelihood.addConstant(1); |
554 | 574 |
620 double startMatchingTime = bayesianStruct.likelihood.offset; | 640 double startMatchingTime = bayesianStruct.likelihood.offset; |
621 double endMatchingTime = bayesianStruct.likelihood.offset + matchWindowWidth; | 641 double endMatchingTime = bayesianStruct.likelihood.offset + matchWindowWidth; |
622 double millisTime = -1*INFINITY;//or 0 is fine | 642 double millisTime = -1*INFINITY;//or 0 is fine |
623 | 643 |
624 int checkIndex = 0; | 644 int checkIndex = 0; |
625 if (channel <= recordedTracks.numberOfAudioTracks){ | 645 if (channel <= recordedTracks.numberOfAudioTracks && checkIndex < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size()){ |
646 | |
626 while (millisTime < startMatchingTime) { | 647 while (millisTime < startMatchingTime) { |
627 millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[checkIndex].millisTime; | 648 millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[checkIndex].millisTime; |
628 checkIndex++; | 649 checkIndex++; |
629 }//go up to where we need to check from fast | 650 }//go up to where we need to check from fast |
630 | 651 |
642 bayesianStruct.likelihood.addGaussianShapeFromRealTime(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime, chromaLikelihoodWidth, quantity); | 663 bayesianStruct.likelihood.addGaussianShapeFromRealTime(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime, chromaLikelihoodWidth, quantity); |
643 | 664 |
644 // bayesianStruct.likelihood.addGaussianShapeFromRealTime(millisTime, onsetLikelihoodWidth, quantity); | 665 // bayesianStruct.likelihood.addGaussianShapeFromRealTime(millisTime, onsetLikelihoodWidth, quantity); |
645 numberOfMatches++; | 666 numberOfMatches++; |
646 totalLikelihoodAdded += quantity; | 667 totalLikelihoodAdded += quantity; |
647 printf("Adding CHROMA Gaussian for onset at time %.1f dist %.3f\n", millisTime, quantity); | 668 |
669 //printf("Adding CHROMA Gaussian for onset at time %.1f dist %.3f\n", millisTime, quantity); | |
648 | 670 |
649 }//end if within limits (changed so it now is 4 sure) | 671 }//end if within limits (changed so it now is 4 sure) |
650 } | 672 } |
651 } | 673 } |
652 | 674 |
653 | 675 |
654 if (numberOfMatches > 0){//no point updating unless there is a match | 676 if (numberOfMatches > 0 && totalLikelihoodAdded > 0){//no point updating unless there is a match |
655 //replacing numberOfMatches with totalLike below... | 677 //replacing numberOfMatches with totalLike below... |
656 | 678 |
657 printf("CHROMA HAS %i MATCHES\n", numberOfMatches); | 679 printf("CHROMA HAS %i MATCHES\n", numberOfMatches); |
658 | 680 |
659 bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-chromaLikelihoodToNoise)/(chromaLikelihoodToNoise*bayesianStruct.likelihood.length)); | 681 bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-chromaLikelihoodToNoise)/(bayesianStruct.likelihood.length)); |
682 //previous way | |
683 // bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-chromaLikelihoodToNoise)/(chromaLikelihoodToNoise*bayesianStruct.likelihood.length)); | |
660 | 684 |
661 bayesianStruct.calculatePosterior(); | 685 bayesianStruct.calculatePosterior(); |
662 lastAlignmentTime = timeIn;//has to use the STAMPED time | 686 lastAlignmentTime = timeIn;//has to use the STAMPED time |
663 recentEventTime[channel] = timeIn; | 687 recentEventTime[channel] = timeIn; |
664 | 688 |
686 } | 710 } |
687 | 711 |
688 double AudioEventMatcher::getChromaEuclideanDistance(float* chromaOne, float* chromaTwo){ | 712 double AudioEventMatcher::getChromaEuclideanDistance(float* chromaOne, float* chromaTwo){ |
689 double distance = 0; | 713 double distance = 0; |
690 double total = 0; | 714 double total = 0; |
715 | |
691 // printf("\n"); | 716 // printf("\n"); |
692 for (int i = 0;i < 12;i++){ | 717 for (int i = 0;i < 12;i++){ |
693 total += (chromaOne[i] - chromaTwo[i])*(chromaOne[i] - chromaTwo[i]); | 718 total += (chromaOne[i] - chromaTwo[i])*(chromaOne[i] - chromaTwo[i]); |
694 // printf("chroma1: %.2f; chroma2: %.2f\n", chromaOne[i], chromaTwo[i]); | 719 // printf("chroma1: %.2f; chroma2: %.2f\n", chromaOne[i], chromaTwo[i]); |
695 // total += chromaOne[i]*chromaOne[i] + (chromaTwo[i]*chromaTwo[i]); | 720 // total += chromaOne[i]*chromaOne[i] + (chromaTwo[i]*chromaTwo[i]); |
696 } | 721 } |
697 | 722 |
698 if (total > 0) | 723 if (total > euclideanMaximumDistance) |
699 distance = 1.0/sqrt(total); | 724 euclideanMaximumDistance = total; |
725 | |
726 distance = ((euclideanMaximumDistance - total)/ euclideanMaximumDistance);//i.e. 1 is | |
727 | |
728 // if (total > 0) | |
729 | |
730 | |
731 // distance = 1.0/sqrt(total); | |
700 // printf("DISTANCE : %.3f\n", distance); | 732 // printf("DISTANCE : %.3f\n", distance); |
701 return distance; | 733 return distance; |
702 } | 734 } |
703 | 735 |
704 void AudioEventMatcher::windowResized(const int& w, const int& h){ | 736 void AudioEventMatcher::windowResized(const int& w, const int& h){ |