Mercurial > hg > multitrack-audio-matcher
view src/AudioEventMatcher.cpp @ 18:4ded82fe318d
added pitch info at top of page
author | Andrew N Robertson <andrew.robertson@eecs.qmul.ac.uk> |
---|---|
date | Tue, 07 Feb 2012 02:37:04 +0000 |
parents | c96b18dd0f48 |
children | 1a62561bd72d |
line wrap: on
line source
/* * AudioEventMatcher.cpp * MultipleAudioMathcher * * Created by Andrew on 31/01/2012. * Copyright 2012 QMUL. All rights reserved. * */ #include "AudioEventMatcher.h" const int matchWindowWidth = 6000; AudioEventMatcher::AudioEventMatcher(){ pitchLikelihoodToNoise = 0.7;//more noise onsetLikelihoodToNoise = 0.5; onsetLikelihoodWidth = 10;//in ms setArraySizes(); usingRealTime = false; bayesianStruct.realTimeMode = &usingRealTime; recentPitch = 0; currentAlignmentPosition = 0; followingLiveInput = true; startedPlaying = false; } void AudioEventMatcher::setWindowDimensions(){ double startHeight = recordedTracks.numberOfAudioTracks * recordedTracks.trackScreenHeight; double heightAvailable = 1 - startHeight; heightAvailable /= 3.0; bayesPositionWindow.setToRelativeSize(0, startHeight, 1, heightAvailable); bayesLikelihoodWindow.setToRelativeSize(0, startHeight + 1*heightAvailable, 1, heightAvailable); bayesTempoWindow.setToRelativeSize(0, startHeight + 2*heightAvailable, 1, heightAvailable); } void AudioEventMatcher::setArraySizes(){ bayesianStruct.resetSpeedSize(200); bayesianStruct.setRelativeSpeedScalar(0.01); bayesianStruct.setSpeedPrior(1.0); bayesianStruct.relativeSpeedPrior.getMaximum(); bayesianStruct.resetSize(matchWindowWidth); bayesianStruct.setPositionDistributionScalar(1); } void AudioEventMatcher::loadAudioFiles(){ recordedTracks.loadTestAudio(); synchroniser.fileLengthSamples = recordedTracks.loadedAudioFiles[0].fileLoader.totalNumberOfSamples; printf("synchroniser has %f samples\n", synchroniser.fileLengthSamples); } void AudioEventMatcher::startPlaying(){ bayesianStruct.setStartPlaying(); currentAlignmentPosition = 0; startTime = ofGetElapsedTimeMillis(); projectedPrior = bayesianStruct.prior; startedPlaying = true; synchroniser.reset(); //bayesianStruct.posterior.printArray(); } void AudioEventMatcher::stopPlaying(){ startedPlaying = false; } void AudioEventMatcher::updatePosition(){ if (!followingLiveInput) recordedTracks.updatePosition(); else if (startedPlaying) recordedTracks.updatePositionToMillis(currentAlignmentPosition); updateBestAlignmentPosition(); } void AudioEventMatcher::updateBestAlignmentPosition(){ //THIS DEALS WITH WHERE WE ARE NOW! ON THE SCREEN //DIFFERENT TO WHEN EVENTS COME IN AS THEY ARE TIMESTAMPED - SO EG A PITCH EVENT MAY ARRIVE 16 CHROMA FRAMES LATER - BIG DIFFERENCE int newTime = ofGetElapsedTimeMillis() - startTime; // double tmp = bayesianStruct.posterior.getIndexInRealTerms(bayesianStruct.posterior.MAPestimate);; // double timetmp = (newTime - lastAlignmentTime); // double speedtmp = bayesianStruct.relativeSpeedPosterior.getIndexInRealTerms(bayesianStruct.relativeSpeedPosterior.MAPestimate); // currentAlignmentTime = newTime; currentAlignmentPosition = bayesianStruct.posterior.getIndexInRealTerms(bayesianStruct.posterior.MAPestimate); currentAlignmentPosition += (newTime - lastAlignmentTime) * bayesianStruct.relativeSpeedPosterior.getIndexInRealTerms(bayesianStruct.relativeSpeedPosterior.MAPestimate); synchroniser.updateRecordedPosition(currentAlignmentPosition, newTime); synchroniser.updateOutputSpeed(); bayesianStruct.projectDistribution(newTime, currentAlignmentPosition, projectedPrior);//prior gets updated to where we are now // printf("ALIGN pos %f time diff %f (now %f , last %f)speed %f :: ALIGN BEST %f\n", tmp, timetmp, (double)ofGetElapsedTimeMillis(), lastAlignmentTime, speedtmp, currentAlignmentPosition); } void AudioEventMatcher::draw(){ //draw some outlines in blue ofSetColor(20,200,200); bayesPositionWindow.drawOutline(); bayesTempoWindow.drawOutline(); //draw the scrolling audio tracks recordedTracks.drawTracks(); ofSetColor(255); // bayesianStruct.relativeSpeedPrior.drawVector(0, 200, bayesTempoWindow); setScreenDisplayTimes(); drawBayesianDistributions(); //bayesianStruct.posterior.drawVector(0, bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis), bayesPositionWindow); //bayesianStruct.posterior.drawVector(bayesianStruct.posterior.getRealTermsAsIndex(0), bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis), bayesPositionWindow); //bayesianStruct.relativeSpeedPosterior.drawVector(0, bayesianStruct.relativeSpeedPosterior.getRealTermsAsIndex(2), bayesTempoWindow); string tmpStr = "pitch "+ofToString(recentPitch, 2); tmpStr += " Nearest "+ofToString(pitchOfNearestMatch,2); tmpStr += " dist "+ofToString(distanceOfNearestMatch, 2); tmpStr += ", Time "+ofToString(recentTime, 0); ofDrawBitmapString(tmpStr, 20, 20); string alignString = " align "+ofToString(currentAlignmentPosition, 2); alignString += " playing "+ofToString(synchroniser.playingPositionRatio, 5); alignString += " pos "+ofToString(synchroniser.playingPositionMillis,0)+" ms"; alignString += " rec pos "+ofToString(synchroniser.recordedPositionMillis,0)+" ms"; ofDrawBitmapString(alignString, 20, 50); ofDrawBitmapString("pos "+ofToString(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.playPosition), 200,600); } void AudioEventMatcher::setScreenDisplayTimes(){ screenWidthMillis = recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.amplitudeNumber); // if (!followingLiveInput){ screenStartTimeMillis = recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.drawParams.windowStartFrame); screenEndTimeMillis = screenStartTimeMillis + screenWidthMillis; //need PRECISION in this alignment /*}else{ screenStartTimeMillis = (int)(currentAlignmentPosition/screenWidthMillis) * screenWidthMillis; screenEndTimeMillis = screenStartTimeMillis + screenWidthMillis; }*/ } void AudioEventMatcher::drawBayesianDistributions(){ int startIndex = bayesianStruct.posterior.getRealTermsAsIndex(screenStartTimeMillis); int endIndex = bayesianStruct.posterior.getRealTermsAsIndex(screenEndTimeMillis); bayesianStruct.posterior.drawConstrainedVector(startIndex, endIndex, 0, ofGetWidth(), bayesPositionWindow); string tmpString = "start "+ofToString(screenStartTimeMillis)+" (index "+ofToString(startIndex)+"), end "+ofToString(screenEndTimeMillis); ofDrawBitmapString(tmpString, bayesPositionWindow.x+20, bayesPositionWindow.y+20); // bayesianStruct.likelihood.drawConstrainedVector(startIndex, endIndex, 0, ofGetWidth(), bayesLikelihoodWindow); bayesianStruct.relativeSpeedPosterior.drawConstrainedVector(0, bayesianStruct.relativeSpeedPosterior.arraySize, 0, ofGetWidth(), bayesTempoWindow); string tmpStr = "zero is "+ofToString(bayesianStruct.posterior.getRealTermsAsIndex(0)); tmpStr += " offsetis "+ofToString(bayesianStruct.posterior.offset); tmpStr += " screenWidth = "+ofToString(bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis)); ofDrawBitmapString(tmpStr, 20,140); tmpStr = "best est "+ofToString(bayesianStruct.bestEstimate); ofDrawBitmapString(tmpStr, 20, 180); ofDrawBitmapString("screenwidth "+ofToString(screenWidthMillis), 20, 800); //green line at current best estimate ofSetColor(0,255,0);//green scrolling line best position double currentEstimateIndex = (currentAlignmentPosition - screenStartTimeMillis)*ofGetWidth()/screenWidthMillis; ofLine(currentEstimateIndex, bayesPositionWindow.y, currentEstimateIndex, bayesPositionWindow.y + bayesPositionWindow.height); ofSetColor(0,255,255);//synchroniser position currentEstimateIndex = (synchroniser.playingPositionMillis - screenStartTimeMillis)*ofGetWidth()/screenWidthMillis; ofLine(currentEstimateIndex, bayesLikelihoodWindow.y, currentEstimateIndex, bayesLikelihoodWindow.y + bayesPositionWindow.height); //draw track by track likelihoods for (int i = 0; i <recordedTracks.numberOfAudioTracks;i++){ ofSetColor(200,255,50);//channel likelihoods in yellow likelihoodVisualisation[i].drawConstrainedVector(likelihoodVisualisation[i].getRealTermsAsIndex(screenStartTimeMillis), likelihoodVisualisation[i].getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window); ofSetColor(0,255,150);//channel priors recentPriors[i].drawConstrainedVector(recentPriors[i].getRealTermsAsIndex(screenStartTimeMillis), recentPriors[i].getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window); ofSetColor(255); ofDrawBitmapString("recent event "+ofToString(recentEventTime[i]), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.x + 20, recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.y + recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.height - 10); } int priorStartIndex = bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis); int priorEndIndex = bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis); // ofSetColor(0,200,200);//recent prior // recentPrior.drawConstrainedVector(priorStartIndex, priorEndIndex, 0, ofGetWidth(), bayesPositionWindow); ofSetColor(255,0,100);//purple prior bayesianStruct.prior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); ofSetColor(255,0,0); projectedPrior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); } void AudioEventMatcher::newPitchEvent(const int& channel, const double& pitchIn, const double& timeIn){ if (pitchIn > 0){ liveInput.addPitchEvent(pitchIn, timeIn); //printPosteriorMAPinfo(); matchNewPitchEvent(channel, pitchIn, timeIn);//main pitch matching fn likelihoodVisualisation[1] = bayesianStruct.likelihood; recentPitch = pitchIn;//for drawing recentTime = timeIn; } } void AudioEventMatcher::newKickEvent(const double& timeIn){ // liveInput.addKickEvent(timeIn); matchNewOnsetEvent(0, timeIn); likelihoodVisualisation[0] = bayesianStruct.likelihood; } void AudioEventMatcher::newKickEvent(const int& channel, const double& timeIn){ // liveInput.addKickEvent(timeIn); matchNewOnsetEvent(channel, timeIn); likelihoodVisualisation[0] = bayesianStruct.likelihood; } void AudioEventMatcher::newSnareEvent(const double& timeIn){ matchNewOnsetEvent(2, timeIn); likelihoodVisualisation[2] = bayesianStruct.likelihood; } void AudioEventMatcher::newSnareEvent(const int& channel, const double& timeIn){ matchNewOnsetEvent(channel, timeIn); likelihoodVisualisation[2] = bayesianStruct.likelihood; } //Needs just to set bounds for the matching process, not have TimeIn void AudioEventMatcher::matchNewOnsetEvent(const int& channel, const double& timeIn){ bayesianStruct.updateBayesianDistributions(timeIn);//moves the posterior up into prior given the time interval and calculates new offsets //start at beginning but OPTIMISE later bayesianStruct.likelihood.offset = bayesianStruct.prior.offset; bayesianStruct.likelihood.zero();//set to zero double quantity = 1;//likelihoodToNoiseRatio / numberOfMatches; int numberOfMatchesFound = 0; double startMatchingTime = bayesianStruct.likelihood.offset; double endMatchingTime = bayesianStruct.likelihood.offset + matchWindowWidth; if (channel <= recordedTracks.numberOfAudioTracks){ for (int i = 0;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size();i++){ double millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime; if (millisTime >= startMatchingTime && millisTime <= endMatchingTime){ bayesianStruct.likelihood.addGaussianShapeFromRealTime(millisTime, onsetLikelihoodWidth, quantity); numberOfMatchesFound++; // printf("Adding Gaussian for onset at time %f offset %f\n", millisTime, bayesianStruct.likelihood.offset); } } } if (numberOfMatchesFound > 0){ // bayesianStruct.likelihood.addConstant((1-likelihoodToNoiseRatio)/bayesianStruct.likelihood.length); bayesianStruct.likelihood.addConstant(numberOfMatchesFound*(1-onsetLikelihoodToNoise)/(onsetLikelihoodToNoise*bayesianStruct.likelihood.length)); bayesianStruct.likelihood.renormalise(); bayesianStruct.calculatePosterior(); lastAlignmentTime = timeIn;//use TIMESTAMP recentEventTime[channel] = timeIn;//ofGetElapsedTimeMillis() - startTime; recentPriors[channel] = bayesianStruct.prior; projectedPrior = bayesianStruct.prior; } } void AudioEventMatcher::matchNewPitchEvent(const int& channel, const double& pitchIn, const double& timeIn){ //start at beginning but OPTIMISE later /*printf("TIME %i\n", ofGetElapsedTimeMillis()); //tmp debug updateBestAlignmentPosition(); printf("current alignment best estimate %f\n", currentAlignmentPosition); */ bayesianStruct.updateBayesianDistributions(timeIn);//moves the posterior up into prior given the time interval and calculates new offsets //set the lielihoods by matching the pitched note int numberOfMatches = 0; bayesianStruct.likelihood.zero();//set to zero double newOnsetTime; double closestDistance = INFINITY; double quantity = 0; if (channel <= recordedTracks.numberOfAudioTracks){ for (int i = 0;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size();i++){ if (checkMatch(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch, pitchIn)) { quantity = getPitchDistance(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch, pitchIn, 8); bayesianStruct.likelihood.addGaussianShapeFromRealTime(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime, 30, quantity); recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].matched = true; numberOfMatches++; } else{ recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].matched = false; } //checking nearest pitch newOnsetTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime; if (abs(newOnsetTime - currentAlignmentPosition) < closestDistance){ closestDistance = abs(newOnsetTime - currentAlignmentPosition); pitchOfNearestMatch = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch; distanceOfNearestMatch = quantity; } } } if (numberOfMatches > 0){//no point updating unless there is a match bayesianStruct.likelihood.addConstant(numberOfMatches*(1-pitchLikelihoodToNoise)/(pitchLikelihoodToNoise*bayesianStruct.likelihood.length)); //tmp set likelihood constant and calculate using that //bayesianStruct.likelihood.zero(); //bayesianStruct.likelihood.addConstant(1); bayesianStruct.calculatePosterior(); lastAlignmentTime = timeIn;//has to use the STAMPED time recentEventTime[channel] = timeIn; recentPriors[channel] = bayesianStruct.prior; projectedPrior = bayesianStruct.prior; } } double AudioEventMatcher::getPitchDistance(const double& pitchOne, const double& pitchTwo, const double& scale){ double scaleFactor = scale * pitchOne / 110.0; int multiplicationFactor = 1; if (pitchTwo > 0){ int multiplicationFactor = round(pitchOne/pitchTwo); } double distance = abs(pitchOne - pitchTwo*multiplicationFactor); if (distance < scaleFactor) distance = 1 - (distance/scaleFactor); else distance = 0; // printf("[pitch distance %f vs %f = %f\n", pitchOne, pitchTwo, distance); return distance; } bool AudioEventMatcher::checkMatch(const double& recordedPitch, const double& livePitch){ if (livePitch > 0){ int multiplicationFactor = (int)(round(recordedPitch/livePitch)); if (abs(recordedPitch - livePitch * multiplicationFactor) < 16) return true; else return false; }else { return false; } } void AudioEventMatcher::windowResized(const int& w, const int& h){ recordedTracks.windowResized(w,h); bayesTempoWindow.resized(w,h); bayesPositionWindow.resized(w,h); } /* void printPosteriorMAPinfo(){ //tmp print stuff printf("New pitch MAP post estimate now %i, ", bayesianStruct.posterior.MAPestimate); double tmp = bayesianStruct.posterior.getMAPestimate(); printf(" getting it %f and offset %f == %f ms\n", tmp, bayesianStruct.posterior.offset, bayesianStruct.posterior.getIndexInRealTerms(tmp)); } */