Mercurial > hg > multitrack-audio-matcher
view src/AudioEventMatcher.cpp @ 8:572564b7cb85
added calculation posterior into both onset and pitch processes
author | Andrew N Robertson <andrew.robertson@eecs.qmul.ac.uk> |
---|---|
date | Fri, 03 Feb 2012 13:28:59 +0000 |
parents | 33dedfe32893 |
children | bc62266af280 |
line wrap: on
line source
/* * AudioEventMatcher.cpp * MultipleAudioMathcher * * Created by Andrew on 31/01/2012. * Copyright 2012 QMUL. All rights reserved. * */ #include "AudioEventMatcher.h" const int matchWindowWidth = 6000; AudioEventMatcher::AudioEventMatcher(){ setArraySizes(); usingRealTime = false; bayesianStruct.realTimeMode = &usingRealTime; recentPitch = 0; currentAlignmentPosition = 0; } void AudioEventMatcher::setWindowDimensions(){ double startHeight = recordedTracks.numberOfAudioTracks * recordedTracks.trackScreenHeight; double heightAvailable = 1 - startHeight; heightAvailable /= 3.0; bayesPositionWindow.setToRelativeSize(0, startHeight, 1, heightAvailable); bayesLikelihoodWindow.setToRelativeSize(0, startHeight + 1*heightAvailable, 1, heightAvailable); bayesTempoWindow.setToRelativeSize(0, startHeight + 2*heightAvailable, 1, heightAvailable); } void AudioEventMatcher::setArraySizes(){ bayesianStruct.resetSpeedSize(200); bayesianStruct.setRelativeSpeedScalar(0.01); bayesianStruct.setSpeedPrior(1.0); bayesianStruct.relativeSpeedPrior.getMaximum(); bayesianStruct.resetSize(matchWindowWidth); bayesianStruct.setPositionDistributionScalar(1); } void AudioEventMatcher::startPlaying(){ bayesianStruct.setStartPlaying(); currentAlignmentPosition = 0; startTime = ofGetElapsedTimeMillis(); //bayesianStruct.posterior.printArray(); } void AudioEventMatcher::updateBestAlignmentPosition(){ currentAlignmentPosition = bayesianStruct.posterior.offset + bayesianStruct.posterior.getIndexInRealTerms(bayesianStruct.posterior.MAPestimate); currentAlignmentPosition += (ofGetElapsedTimeMillis() - lastAlignmentTime) * bayesianStruct.relativeSpeedPosterior.getIndexInRealTerms(bayesianStruct.relativeSpeedPosterior.MAPestimate); } void AudioEventMatcher::draw(){ //draw some outlines in blue ofSetColor(20,200,200); bayesPositionWindow.drawOutline(); bayesTempoWindow.drawOutline(); //draw the scrolling audio tracks recordedTracks.drawTracks(); ofSetColor(255); // bayesianStruct.relativeSpeedPrior.drawVector(0, 200, bayesTempoWindow); drawBayesianDistributions(); // bayesianStruct.posterior.drawVector(0, bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis), bayesPositionWindow); //bayesianStruct.posterior.drawVector(bayesianStruct.posterior.getRealTermsAsIndex(0), bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis), bayesPositionWindow); // bayesianStruct.relativeSpeedPosterior.drawVector(0, bayesianStruct.relativeSpeedPosterior.getRealTermsAsIndex(2), bayesTempoWindow); ofDrawBitmapString("pitch "+ofToString(recentPitch, 2)+", Time "+ofToString(recentTime, 0), 20, 20); } void AudioEventMatcher::drawBayesianDistributions(){ double screenWidthMillis = recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.amplitudeNumber); double screenStartTimeMillis = recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.drawParams.windowStartFrame); double screenEndTimeMillis = screenStartTimeMillis + screenWidthMillis; int startIndex = bayesianStruct.posterior.getRealTermsAsIndex(screenStartTimeMillis); int endIndex = bayesianStruct.posterior.getRealTermsAsIndex(screenEndTimeMillis); bayesianStruct.posterior.drawConstrainedVector(startIndex, endIndex, 0, ofGetWidth(), bayesPositionWindow); string tmpString = "start "+ofToString(screenStartTimeMillis)+" (index "+ofToString(startIndex)+"), end "+ofToString(screenEndTimeMillis); ofDrawBitmapString(tmpString, bayesPositionWindow.x+20, bayesPositionWindow.y+20); // bayesianStruct.likelihood.drawConstrainedVector(startIndex, endIndex, 0, ofGetWidth(), bayesLikelihoodWindow); bayesianStruct.relativeSpeedPosterior.drawConstrainedVector(0, bayesianStruct.relativeSpeedPosterior.arraySize, 0, ofGetWidth(), bayesTempoWindow); string tmpStr = "zero is "+ofToString(bayesianStruct.posterior.getRealTermsAsIndex(0)); tmpStr += " offsetis "+ofToString(bayesianStruct.posterior.offset); tmpStr += " screenWidth = "+ofToString(bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis)); ofDrawBitmapString(tmpStr, 20,140); tmpStr = "best est "+ofToString(bayesianStruct.bestEstimate); ofDrawBitmapString(tmpStr, 20, 180); ofDrawBitmapString("screenwidth "+ofToString(screenWidthMillis), 20, 800); ofSetColor(0,255,0); double currentEstimateIndex = (currentAlignmentPosition - screenStartTimeMillis)*ofGetWidth()/screenWidthMillis; ofLine(currentEstimateIndex, bayesPositionWindow.y, currentEstimateIndex, bayesPositionWindow.y + bayesPositionWindow.height); //draw track by track likelihoods for (int i = 0; i <recordedTracks.numberOfAudioTracks;i++){ ofSetColor(200,255,50); likelihoodVisualisation[i].drawConstrainedVector(likelihoodVisualisation[i].getRealTermsAsIndex(screenStartTimeMillis), likelihoodVisualisation[i].getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window); ofSetColor(255); ofDrawBitmapString("recent event "+ofToString(recentEventTime[i]), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.x + 20, recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.y + recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.height - 10); } int priorStartIndex = recentPrior.getRealTermsAsIndex(screenStartTimeMillis); int priorEndIndex = recentPrior.getRealTermsAsIndex(screenEndTimeMillis); ofSetColor(0,200,200); recentPrior.drawConstrainedVector(priorStartIndex, priorEndIndex, 0, ofGetWidth(), bayesPositionWindow); // bayesianStruct.prior.addTriangularShape(100, 20, 0.4); ofSetColor(255,0,100); bayesianStruct.prior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesLikelihoodWindow); } void AudioEventMatcher::newPitchEvent(const int& channel, const double& pitchIn, const double& timeIn){ if (pitchIn > 0){ liveInput.addPitchEvent(pitchIn, timeIn); //tmp print stuff printf("New pitch MAP post estimate now %i, ", bayesianStruct.posterior.MAPestimate); double tmp = bayesianStruct.posterior.getMAPestimate(); printf(" getting it %f and offset %f == %f ms\n", tmp, bayesianStruct.posterior.offset, bayesianStruct.posterior.getIndexInRealTerms(tmp)); matchNewPitchEvent(channel, pitchIn, timeIn);//main pitch matching fn likelihoodVisualisation[1] = bayesianStruct.likelihood; recentPitch = pitchIn;//for drawing recentTime = timeIn; } } void AudioEventMatcher::newKickEvent(const double& timeIn){ // liveInput.addKickEvent(timeIn); matchNewOnsetEvent(0, timeIn); likelihoodVisualisation[0] = bayesianStruct.likelihood; } void AudioEventMatcher::newKickEvent(const int& channel, const double& timeIn){ // liveInput.addKickEvent(timeIn); matchNewOnsetEvent(channel, timeIn); likelihoodVisualisation[0] = bayesianStruct.likelihood; } void AudioEventMatcher::newSnareEvent(const double& timeIn){ matchNewOnsetEvent(2, timeIn); likelihoodVisualisation[2] = bayesianStruct.likelihood; } void AudioEventMatcher::newSnareEvent(const int& channel, const double& timeIn){ matchNewOnsetEvent(channel, timeIn); likelihoodVisualisation[2] = bayesianStruct.likelihood; } //Needs just to set bounds for the matching process, not have TimeIn void AudioEventMatcher::matchNewOnsetEvent(const int& channel, const double& timeIn){ bayesianStruct.updateBayesianDistributions(timeIn);//moves the posterior up into prior given the time interval and calculates new offsets //start at beginning but OPTIMISE later double onsetLikelihoodToNoise = 0.5; double likelihoodWidth = 40; bayesianStruct.likelihood.offset = bayesianStruct.prior.offset; bayesianStruct.likelihood.zero();//set to zero double quantity = 1;//likelihoodToNoiseRatio / numberOfMatches; int numberOfMatchesFound = 0; double startTime = bayesianStruct.likelihood.offset; double endTime = bayesianStruct.likelihood.offset + matchWindowWidth; if (channel <= recordedTracks.numberOfAudioTracks){ for (int i = 0;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size();i++){ double millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime; if (millisTime >= startTime && millisTime <= endTime){ bayesianStruct.likelihood.addGaussianShapeFromRealTime(millisTime, likelihoodWidth, quantity); numberOfMatchesFound++; // printf("Adding Gaussian for onset at time %f offset %f\n", millisTime, bayesianStruct.likelihood.offset); } } } // bayesianStruct.likelihood.addConstant((1-likelihoodToNoiseRatio)/bayesianStruct.likelihood.length); bayesianStruct.likelihood.addConstant(numberOfMatchesFound*(1-onsetLikelihoodToNoise)/(onsetLikelihoodToNoise*bayesianStruct.likelihood.length)); bayesianStruct.likelihood.renormalise(); bayesianStruct.calculatePosterior(); lastAlignmentTime = ofGetElapsedTimeMillis(); recentEventTime[channel] = ofGetElapsedTimeMillis() - startTime; } void AudioEventMatcher::matchNewPitchEvent(const int& channel, const double& pitchIn, const double& timeIn){ //start at beginning but OPTIMISE later bayesianStruct.updateBayesianDistributions(timeIn);//moves the posterior up into prior given the time interval and calculates new offsets //set the lielihoods by matching the pitched note double pitchLikelihoodToNoise = 0.5;//more noise int numberOfMatches = 0; bayesianStruct.likelihood.zero();//set to zero double quantity = 0; if (channel <= recordedTracks.numberOfAudioTracks){ for (int i = 0;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size();i++){ if (checkMatch(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch, pitchIn)) { quantity = getPitchDistance(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch, pitchIn, 10); bayesianStruct.likelihood.addGaussianShapeFromRealTime(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime, 30, quantity); recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].matched = true; numberOfMatches++; } else{ recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].matched = false; } } } recentPrior = bayesianStruct.prior; if (numberOfMatches > 0){//no point updating unless there is a match bayesianStruct.likelihood.addConstant(numberOfMatches*(1-pitchLikelihoodToNoise)/(pitchLikelihoodToNoise*bayesianStruct.likelihood.length)); //tmp set likelihood constant and calculate using that //bayesianStruct.likelihood.zero(); //bayesianStruct.likelihood.addConstant(1); bayesianStruct.calculatePosterior(); } lastAlignmentTime = ofGetElapsedTimeMillis(); recentEventTime[channel] = ofGetElapsedTimeMillis() - startTime; } double AudioEventMatcher::getPitchDistance(const double& pitchOne, const double& pitchTwo, const double& scale){ double distance = abs(pitchOne - pitchTwo); if (distance < scale) distance = 1 - (distance/scale); else distance = 0; // printf("[pitch distance %f vs %f = %f\n", pitchOne, pitchTwo, distance); return distance; } bool AudioEventMatcher::checkMatch(const double& recordedPitch, const double& livePitch){ if (abs(recordedPitch - livePitch) < 40) return true; else return false; } void AudioEventMatcher::windowResized(const int& w, const int& h){ recordedTracks.windowResized(w,h); bayesTempoWindow.resized(w,h); bayesPositionWindow.resized(w,h); }