Mercurial > hg > multitrack-audio-matcher
view src/AudioEventMatcher.cpp @ 56:4394c9490716 tip
minor changes
author | Andrew N Robertson <andrew.robertson@eecs.qmul.ac.uk> |
---|---|
date | Mon, 24 Dec 2012 18:58:39 +0000 |
parents | 2eca10a31ae2 |
children |
line wrap: on
line source
/* * AudioEventMatcher.cpp * MultipleAudioMathcher * * Created by Andrew on 31/01/2012. * Copyright 2012 QMUL. All rights reserved. * */ #include "AudioEventMatcher.h" //whi are there two Tempo Follower class objects? const int matchWindowWidth = 8000;//ms in which to match const float pitchCutOff = 16;//within which pitches are even considered const double pitchWidth = 12; bool printInfo = false; AudioEventMatcher::AudioEventMatcher(){ ofBackground(0); useChromaDotProduct = false;//false for most tests printingData = false; updateTempoMethodOn = false; pitchLikelihoodToNoise = 0.6;//more noise pitchLikelihoodWidth = 30; chromaLikelihoodToNoise = 0.5;//lower => more noise, higher more weight for events chromaLikelihoodWidth = 50;//ms round onset event //onsetLikelihoodToNoise = 0.2;//0.1 and 10 as to 9/5/12 kickLikelihoodToNoise = 0.3; snareLikelihoodToNoise = 0.1; onsetLikelihoodWidth = 6;//in ms setArraySizes(); usingRealTime = false; bayesianStruct.realTimeMode = &usingRealTime; recentPitch = 0; currentAlignmentPosition = 0; followingLiveInput = true; startedPlaying = false; recordedTempoIndex = 0; bayesianStruct.startingWindowWidth = 100;//matchWindowWidth / 8; bayesianStruct.matchWindowWidth = matchWindowWidth; drawLikelihoods = true; drawPosterior = false; temporal.printOutput = true;//printInfo; // temporal.setUpEventTimeMatrix(); // recordedTempoData.setUpEventTimeMatrix(); } void AudioEventMatcher::setWindowDimensions(){ double startHeight = recordedTracks.numberOfAudioTracks * recordedTracks.trackScreenHeight; double heightAvailable = 1 - startHeight; heightAvailable /= numberOfChannels; bayesPositionWindow.setToRelativeSize(0, startHeight, 1, heightAvailable); bayesLikelihoodWindow.setToRelativeSize(0, startHeight + 1*heightAvailable, 1, heightAvailable); bayesTempoWindow.setToRelativeSize(0, startHeight + 2*heightAvailable, 1, heightAvailable); } void AudioEventMatcher::setArraySizes(){ bayesianStruct.resetSpeedSize(200); bayesianStruct.setRelativeSpeedScalar(0.01); bayesianStruct.setSpeedPrior(1.0); bayesianStruct.relativeSpeedPrior.getMaximum(); float scalarForBayesianDistribution = 2; bayesianStruct.resetSize(matchWindowWidth / scalarForBayesianDistribution); bayesianStruct.setPositionDistributionScalar(2); } void AudioEventMatcher::loadAudioFiles(){ recordedTracks.loadTestAudio(); synchroniser.fileLengthSamples = recordedTracks.loadedAudioFiles[0].fileLoader.totalNumberOfSamples; printf("synchroniser has %f samples\n", synchroniser.fileLengthSamples); printf("First PASS\n"); calculateRecordedTempoData(); recordedTempoData.printTempoTimes(); printf("\n audioeventmatcher\nFIRST PASS: FINAL recorded tempo is %f\n", recordedTempoData.playingTempo); printf("SECOND PASS\n"); setTempoPrior(recordedTempoData.playingTempo); calculateRecordedTempoData();//now calculate again using better prior printf("\n audioeventmatcher\nSECOND PASS: FINAL recorded tempo is %f\n", recordedTempoData.playingTempo); printf("GLOBAL TEMPO of RECORDED FILES\n"); recordedTempoData.printTempoTimes(); } void AudioEventMatcher::setTempoPrior(double tempo){ recordedTempoData.zero(); recordedTempoData.tempoPosterior.zero(); recordedTempoData.tempoPosterior.addGaussianShapeFromRealTime(tempo, 3, 1); } void AudioEventMatcher::calculateRecordedTempoData(){ int indexForOnsets[3]; indexForOnsets[0] = 0; indexForOnsets[1] = 0; indexForOnsets[2] = 0; int kickTime = 0; int snareTime = 0; while (indexForOnsets[0] < recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.chromaOnsets.size() || indexForOnsets[2] < recordedTracks.loadedAudioFiles[2].fileLoader.onsetDetect.chromaOnsets.size()) { setNextOnsetTime(0, kickTime, &indexForOnsets[0]); setNextOnsetTime(2, snareTime, &indexForOnsets[0]); if (kickTime < snareTime){ printf("kick(%i) at %i\n", indexForOnsets[0], kickTime); recordedTempoData.updateTempo(0, kickTime); // printf("recorded tempo is %f\n", recordedTempoData.playingTempo); indexForOnsets[0]++; }else { printf("snare(%i) at %i\n", indexForOnsets[2], snareTime); recordedTempoData.updateTempo(2, snareTime); // printf("recorded tempo is %f\n", recordedTempoData.playingTempo); indexForOnsets[2]++; } }//end while } void AudioEventMatcher::setNextOnsetTime(const int& channel, int& time, int* indexForOnsets){ if (indexForOnsets[channel] < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size()){ time = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[indexForOnsets[channel]].millisTime; } else { time = 2147483647;//infinity } } void AudioEventMatcher::startPlaying(){ bayesianStruct.setStartPlaying(); currentAlignmentPosition = 0; startTime = ofGetElapsedTimeMillis(); projectedPrior = bayesianStruct.prior; startedPlaying = true; synchroniser.reset(); temporal.reset(); recordedTempoIndex = 0; recordedTempo = recordedTempoData.globalTempo[recordedTempoIndex]; printf("recorded tempo is %f\n", recordedTempo); currentSpeedRatio = 1; relativeTempo = 1; temporal.reset(); // temporal.tempoPosterior.zero(); // temporal.tempoPosterior.addGaussianShapeFromRealTime(recordedTempo, 2000, 1); //SET TEMPO PRIOR for Speed Ratio //the update this setSpeedRatioDistribution(currentSpeedRatio); euclideanMaximumDistance = 0; //bayesianStruct.posterior.printArray(); } void AudioEventMatcher::setSpeedRatioDistribution(const double& speedRatio){ //here is the speed combo actually used bayesianStruct.relativeSpeedPosterior.zero(); // bayesianStruct.relativeSpeedPosterior.addToIndex(bayesianStruct.relativeSpeedPosterior.getRealTermsAsIndex(speedRatio), 1); bayesianStruct.relativeSpeedPosterior.addGaussianShapeFromRealTime(speedRatio, 0.1, 3); bayesianStruct.relativeSpeedPosterior.addGaussianShapeFromRealTime(speedRatio, 0.02, 2); } void AudioEventMatcher::stopPlaying(){ startedPlaying = false; testDistributionOutput.closeFile(); //temporal.printEventTimes(); } void AudioEventMatcher::rescue(){ bayesianStruct.posterior.zero(); bayesianStruct.posterior.addConstant(1); bayesianStruct.prior.zero(); bayesianStruct.prior.addConstant(1); } #pragma mark -update void AudioEventMatcher::updatePosition(){ if (startedPlaying){ /* if (!followingLiveInput) recordedTracks.updatePosition(); else */ recordedTracks.updatePositionToMillis(currentAlignmentPosition); updateBestAlignmentPosition(); } else { recordedTracks.updatePosition(); markerPlaybackPosition = recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.playPositionFrames); currentAlignmentPosition = markerPlaybackPosition; } if (updateTempoMethodOn){ updateRecordedTempo(); } } void AudioEventMatcher::updateRecordedTempo(){ //tempo of equivalent recorded position is updated if (recordedTempoIndex < recordedTempoData.globalTempoTimes.size()){//if for debug recordedTempo = getRecordedTempoAtMillis(currentAlignmentPosition); double tmpRatio = currentSpeedRatio; currentSpeedRatio = temporal.playingTempo / recordedTempo; if (currentSpeedRatio != tmpRatio) setSpeedRatioDistribution(currentSpeedRatio); }//end if to prevent debug crash temporal.tempoPosterior.addGaussianShape(temporal.tempoPosterior.MAPestimate, temporal.tempoArraySize / 4, 0.5 ); } double AudioEventMatcher::getRecordedTempoAtMillis(const double& millisPosition){ while(currentAlignmentPosition < recordedTempoData.globalTempoTimes[recordedTempoIndex] && recordedTempoIndex > 0){ //this loop never used as sequential, so we expect the alignment time to be ahead of the last recorded tempo point //but just in case recordedTempoIndex--; } while(currentAlignmentPosition > recordedTempoData.globalTempoTimes[recordedTempoIndex]){ recordedTempoIndex++; } return recordedTempoData.globalTempo[recordedTempoIndex]; } void AudioEventMatcher::updateBestAlignmentPosition(){ //THIS DEALS WITH WHERE WE ARE NOW! ON THE SCREEN //DIFFERENT TO WHEN EVENTS COME IN AS THEY ARE TIMESTAMPED - SO EG A PITCH EVENT MAY ARRIVE 16 CHROMA FRAMES LATER - BIG DIFFERENCE int newTime = getTimeNow(); // double tmp = bayesianStruct.posterior.getIndexInRealTerms(bayesianStruct.posterior.MAPestimate);; // double timetmp = (newTime - lastAlignmentTime); // double speedtmp = bayesianStruct.relativeSpeedPosterior.getIndexInRealTerms(bayesianStruct.relativeSpeedPosterior.MAPestimate); // currentAlignmentTime = newTime; currentAlignmentPosition = bayesianStruct.posterior.getIndexInRealTerms(bayesianStruct.posterior.MAPestimate); currentAlignmentPosition += (newTime - lastAlignmentTime) * bayesianStruct.relativeSpeedPosterior.getIndexInRealTerms(bayesianStruct.relativeSpeedPosterior.MAPestimate); synchroniser.updateRecordedPosition(currentAlignmentPosition, newTime); synchroniser.updateOutputSpeed(); bayesianStruct.projectDistribution(newTime, currentAlignmentPosition, projectedPrior);//prior gets updated to where we are now // printf("updateBestAlignment:: alignment %i:: %i\n", newTime, (int) currentAlignmentPosition); // printf("ALIGN pos %f time diff %f (now %f , last %f)speed %f :: ALIGN BEST %f\n", tmp, timetmp, (double)ofGetElapsedTimeMillis(), lastAlignmentTime, speedtmp, currentAlignmentPosition); } int AudioEventMatcher::getTimeNow(){ return ofGetElapsedTimeMillis() - startTime; } #pragma mark -markers void AudioEventMatcher::addMarkerNow(){ if (!startedPlaying) markedPoints.addMarker(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.playPositionFrames)); } void AudioEventMatcher::deleteMarkers(){ markedPoints.markers.clear(); } void AudioEventMatcher::moveToNextMarker(){ int m = 0; while (m < markedPoints.markers.size() && markedPoints.markers[m] < currentAlignmentPosition) m++; if (markedPoints.markers[m] > currentAlignmentPosition){ setPlaybackPosition(markedPoints.markers[m]); printf("move to marker %f from current pos %f\n", markedPoints.markers[m], currentAlignmentPosition); } } void AudioEventMatcher::moveToPreviousMarker(){ int m = 0; while (m < markedPoints.markers.size() && markedPoints.markers[m] < currentAlignmentPosition) m++; if (m > 1 && markedPoints.markers[m-1] + 300 > currentAlignmentPosition) setPlaybackPosition(markedPoints.markers[m-2]); else if (m > 0 && markedPoints.markers[m-1] < currentAlignmentPosition){ setPlaybackPosition(markedPoints.markers[m-1]); printf("move to marker %f from current pos %f\n", markedPoints.markers[m], currentAlignmentPosition); } } void AudioEventMatcher::deleteNearestMarker(){ int m = 0; while (m < markedPoints.markers.size() && markedPoints.markers[m] < currentAlignmentPosition) m++; int markerToDelete = m; if (m >= 0 && fabs(markedPoints.markers[m] - currentAlignmentPosition) < fabs(markedPoints.markers[m-1] - currentAlignmentPosition)){ markerToDelete = m-1; } markedPoints.deleteMarker(markerToDelete); } void AudioEventMatcher::setPlaybackPosition(const double& millis){ if (!startedPlaying) recordedTracks.updatePlaybackPositionToMillis(millis); } void AudioEventMatcher::goToMarker(const int& markerID){ if (startedPlaying && markerID >= 0 && markerID < markedPoints.markers.size()){ double markPosition = markedPoints.markers[markerID]; setToPosition(markPosition); } } void AudioEventMatcher::mousePressed(const int& x){ if (!startedPlaying){ double position = (float) x * screenWidthMillis / ofGetWidth(); position += screenStartTimeMillis; setPlaybackPosition(position); } } void AudioEventMatcher::setToPosition(const double& position){ setNewLimits(position); bayesianStruct.posterior.zero(); bayesianStruct.zeroDistributionAtPosition(bayesianStruct.posterior, position); bayesianStruct.posterior.addGaussianShapeFromRealTime(position, 100, 1); bayesianStruct.prior.zero(); bayesianStruct.zeroDistributionAtPosition(bayesianStruct.prior, position); bayesianStruct.prior.addGaussianShapeFromRealTime(position, 100, 1); // bayesianStruct.posterior.addConstant(0.1); // bayesianStruct.prior.zero(); // bayesianStruct.prior.addConstant(1); } void AudioEventMatcher::setNewLimits(const double& position){ //update the distribution to the new limits double difference = position - (bayesianStruct.prior.getIndexInRealTerms(bayesianStruct.prior.length/2)); bayesianStruct.prior.offset += difference; bayesianStruct.likelihood.offset += difference; bayesianStruct.posterior.offset += difference; bayesianStruct.prior.zero(); bayesianStruct.posterior.zero(); bayesianStruct.likelihood.zero(); } #pragma mark -draw void AudioEventMatcher::draw(){ //MAIN DRAW FUNCTION FOR ALL //ofBackground(255,255,255); //draw some outlines in blue ofSetColor(20,200,200); // bayesPositionWindow.drawOutline(); // bayesTempoWindow.drawOutline(); //draw the scrolling audio tracks recordedTracks.drawTracks(); ofSetColor(255); // bayesianStruct.relativeSpeedPrior.drawVector(0, 200, bayesTempoWindow); setScreenDisplayTimes(); drawBayesianDistributions(); //bayesianStruct.posterior.drawVector(0, bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis), bayesPositionWindow); //bayesianStruct.posterior.drawVector(bayesianStruct.posterior.getRealTermsAsIndex(0), bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis), bayesPositionWindow); //bayesianStruct.relativeSpeedPosterior.drawVector(0, bayesianStruct.relativeSpeedPosterior.getRealTermsAsIndex(2), bayesTempoWindow); //tempo //temporal.drawTempoArray(bayesLikelihoodWindow); if (printInfo){ drawRecordedTempo(); drawPlayingTempo(); drawAlignmentTimes(); } drawMarkers(); } void AudioEventMatcher::drawRecordedTempo(){ int xTempoIndex = ofGetWidth() * (double)(recordedTempo - recordedTempoData.minimumTempoInterval)/(double)(recordedTempoData.maximumTempoInterval - recordedTempoData.minimumTempoInterval); ofSetColor(0, 200, 0); ofLine(xTempoIndex, bayesLikelihoodWindow.y, xTempoIndex, bayesLikelihoodWindow.y + bayesLikelihoodWindow.height); ofDrawBitmapString(ofToString(recordedTempo), xTempoIndex, bayesLikelihoodWindow.y + 10); } void AudioEventMatcher::drawPlayingTempo(){ //purple line for MAP estimate of new intervals int xTempoIndex = (double)(ofGetWidth() * (temporal.playingTempo - temporal.minimumTempoInterval))/(double)(temporal.maximumTempoInterval - temporal.minimumTempoInterval); ofSetColor(200, 0, 200); ofLine(xTempoIndex, bayesLikelihoodWindow.y, xTempoIndex, bayesLikelihoodWindow.y + bayesLikelihoodWindow.height); ofDrawBitmapString(ofToString(temporal.playingTempo), xTempoIndex, bayesLikelihoodWindow.y + 10); //red line where the ratio is between playing tempo and recorded one int xSpeedRatioIndex = (double)(temporal.tempoPosterior.getIndexInRealTerms(currentSpeedRatio)*ofGetWidth())/(double)temporal.tempoPosterior.arraySize; ofSetColor(200,0,0); ofLine(xSpeedRatioIndex, bayesTempoWindow.y, xSpeedRatioIndex, bayesTempoWindow.y + bayesTempoWindow.height); if (printInfo){ string tmpString = "playing "+ofToString(temporal.playingTempo); tmpString += ", recorded "+ofToString(recordedTempo); tmpString += " ratio "+ofToString(currentSpeedRatio); ofSetColor(155,155,155); ofDrawBitmapString(tmpString, 20, bayesTempoWindow.y+10); } } void AudioEventMatcher::setScreenDisplayTimes(){ screenWidthMillis = recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.amplitudeNumber); // if (!followingLiveInput){ screenStartTimeMillis = recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.framesToMillis(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.drawParams.windowStartFrame); screenEndTimeMillis = screenStartTimeMillis + screenWidthMillis; //need PRECISION in this alignment /*}else{ screenStartTimeMillis = (int)(currentAlignmentPosition/screenWidthMillis) * screenWidthMillis; screenEndTimeMillis = screenStartTimeMillis + screenWidthMillis; }*/ } void AudioEventMatcher::drawBayesianDistributions(){ drawPositionWindow(); // bayesianStruct.likelihood.drawConstrainedVector(startIndex, endIndex, 0, ofGetWidth(), bayesLikelihoodWindow); bayesianStruct.relativeSpeedPosterior.drawConstrainedVector(0, bayesianStruct.relativeSpeedPosterior.arraySize, 0, ofGetWidth(), bayesTempoWindow); if (drawLikelihoods) drawTrackLikelihoods(); // int priorStartIndex = bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis); // int priorEndIndex = bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis); // ofSetColor(0,200,200);//recent prior // recentPrior.drawConstrainedVector(priorStartIndex, priorEndIndex, 0, ofGetWidth(), bayesPositionWindow); if (printInfo) drawInfo(); } void AudioEventMatcher::drawPositionWindow(){ int startIndex = bayesianStruct.posterior.getRealTermsAsIndex(screenStartTimeMillis); int endIndex = bayesianStruct.posterior.getRealTermsAsIndex(screenEndTimeMillis); string tmpString = "start "+ofToString(screenStartTimeMillis)+" (index "+ofToString(startIndex)+"), end "+ofToString(screenEndTimeMillis); ofDrawBitmapString(tmpString, bayesPositionWindow.x+20, bayesPositionWindow.y+20); //draw posterior in the bayes position window ofSetColor(255,0,255); bayesianStruct.posterior.drawConstrainedVector(startIndex, endIndex, 0, ofGetWidth(), bayesPositionWindow); //green line at current best estimate ofSetColor(0,255,0);//green scrolling line best position double currentEstimateIndex = (currentAlignmentPosition - screenStartTimeMillis)*ofGetWidth()/screenWidthMillis; ofLine(currentEstimateIndex, bayesPositionWindow.y, currentEstimateIndex, bayesPositionWindow.y + bayesPositionWindow.height); ofSetColor(0,255,255);//synchroniser position currentEstimateIndex = (synchroniser.playingPositionMillis - screenStartTimeMillis)*ofGetWidth()/screenWidthMillis; ofLine(currentEstimateIndex, bayesLikelihoodWindow.y, currentEstimateIndex, bayesLikelihoodWindow.y + bayesPositionWindow.height); ofSetColor(255,0,100);//purple prior bayesianStruct.prior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); ofSetColor(255,0,0);//projected prior in red projectedPrior.drawConstrainedVector(bayesianStruct.prior.getRealTermsAsIndex(screenStartTimeMillis), bayesianStruct.prior.getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), bayesPositionWindow); //draw pitch ofSetColor(0,100,255); int index = getScreenWidthIndexOfEventTime(recentPitchEventTime); //this window would be used (recordedTracks.loadedAudioFiles[1].fileLoader.onsetDetect.window); ofSetColor(255, 255, 255); ofDrawBitmapString("curr.speed "+ofToString(synchroniser.smoothedSpeedOutput, 3), 20, ofGetHeight() - 10); } int AudioEventMatcher::getScreenWidthIndexOfEventTime(const double& time){ return (time - screenStartTimeMillis )*ofGetWidth()/screenWidthMillis; } void AudioEventMatcher::drawTrackLikelihoods(){ //draw track by track likelihoods for (int i = 0; i <recordedTracks.numberOfAudioTracks;i++){ ofSetColor(200,255,50);//channel likelihoods in yellow likelihoodVisualisation[i].drawConstrainedVector(likelihoodVisualisation[i].getRealTermsAsIndex(screenStartTimeMillis), likelihoodVisualisation[i].getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window); ofSetColor(0,255,150);//channel priors recentPriors[i].drawConstrainedVector(recentPriors[i].getRealTermsAsIndex(screenStartTimeMillis), recentPriors[i].getRealTermsAsIndex(screenEndTimeMillis), 0, ofGetWidth(), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window); if (printInfo){ ofSetColor(255); ofDrawBitmapString("recent event "+ofToString(recentEventTime[i]), recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.x + 20, recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.y + recordedTracks.loadedAudioFiles[i].fileLoader.onsetDetect.window.height - 10); } } } void AudioEventMatcher::drawInfo(){ string tmpStr = "zero is "+ofToString(bayesianStruct.posterior.getRealTermsAsIndex(0)); tmpStr += " offsetis "+ofToString(bayesianStruct.posterior.offset); tmpStr += " screenWidth = "+ofToString(bayesianStruct.posterior.getRealTermsAsIndex(screenWidthMillis)); ofDrawBitmapString(tmpStr, 20,140); tmpStr = "best est "+ofToString(bayesianStruct.bestEstimate); ofDrawBitmapString(tmpStr, 20, 180); //ofDrawBitmapString("screenwidth "+ofToString(screenWidthMillis), 20, 800); ofSetColor(255); tmpStr = "pitch "+ofToString(recentPitch, 2); tmpStr += " Nearest "+ofToString(pitchOfNearestMatch,2); tmpStr += " dist "+ofToString(distanceOfNearestMatch, 2); tmpStr += ", Time "+ofToString(recentPitchEventTime, 0); ofDrawBitmapString(tmpStr, 20, 20); string alignString = "align "+ofToString(currentAlignmentPosition, 2);//same as synchroniser-recordedposition alignString += " playing "+ofToString(synchroniser.playingPositionRatio, 5); alignString += " pos "+ofToString(synchroniser.playingPositionMillis,0)+" ms";//playing position in file - causal correction alignString += " rec pos "+ofToString(synchroniser.recordedPositionMillis,0)+" ms";//currentAlignmentPosition in rehearsal alignString += "playing time "+ofToString(synchroniser.recordedPositionTimeSent, 0)+" ms";//playing time since begining of live take ofDrawBitmapString(alignString, 20, 50); ofDrawBitmapString("pos "+ofToString(recordedTracks.loadedAudioFiles[0].fileLoader.onsetDetect.playPosition), 200,600); } void AudioEventMatcher::drawAlignmentTimes(){ ofSetColor(255); std::string dataString = "Live time "+ofToString(synchroniser.recordedPositionTimeSent); dataString += ", Reh time "+ofToString(synchroniser.recordedPositionMillis); ofDrawBitmapString(dataString, 10, ofGetHeight() - 40); } void AudioEventMatcher::drawMarkers(){ int i = 0; while (i < markedPoints.markers.size() && markedPoints.markers[i] < screenStartTimeMillis) i++; while (i < markedPoints.markers.size() && markedPoints.markers[i] < screenStartTimeMillis + screenWidthMillis) { ofSetColor(255,255,60); double markerPosition = (markedPoints.markers[i] - screenStartTimeMillis)*ofGetWidth()/screenWidthMillis; ofLine(markerPosition, bayesPositionWindow.y, markerPosition, bayesPositionWindow.y + bayesPositionWindow.height); //printf("marker %f pos %f\n", markedPoints.markers[i], markerPosition); i++; ofDrawBitmapString(ofToString(i), markerPosition, bayesPositionWindow.y); } } #pragma mark -newEvents void AudioEventMatcher::newPitchEvent(const int& channel, const double& pitchIn, const double& timeIn){ if (pitchIn > 0){ liveInput.addPitchEvent(pitchIn, timeIn); //printPosteriorMAPinfo(); matchNewPitchEvent(channel, pitchIn, timeIn);//main pitch matching fn likelihoodVisualisation[1] = bayesianStruct.likelihood; recentPitch = pitchIn;//for drawing recentPitchEventTime = timeIn; } checkTempo(); } void AudioEventMatcher::newChromaEvent(const int& channel, float* chromaIn, const double& timeIn){ // could add event to the liveInput list? as in pitch event if (printingData){ printf("match chroma channel %i\n", channel); for (int i = 0;i < 12;i++){ printf("chroma in[%i] = %f\n", i, chromaIn[i]); } } matchNewChromaEvent(channel, chromaIn, timeIn);//main pitch matching fn likelihoodVisualisation[channel] = bayesianStruct.likelihood; checkTempo(); } void AudioEventMatcher::newKickEvent(const double& timeIn){ // liveInput.addKickEvent(timeIn); newKickEvent(0, timeIn); // matchNewOnsetEvent(0, timeIn); // likelihoodVisualisation[0] = bayesianStruct.likelihood; } void AudioEventMatcher::newKickEvent(const int& channel, const double& timeIn){ // liveInput.addKickEvent(timeIn); matchNewOnsetEvent(channel, timeIn); likelihoodVisualisation[0] = bayesianStruct.likelihood; checkTempo(); } void AudioEventMatcher::newSnareEvent(const double& timeIn){ newSnareEvent(2, timeIn); // matchNewOnsetEvent(2, timeIn); // likelihoodVisualisation[2] = bayesianStruct.likelihood; } void AudioEventMatcher::newSnareEvent(const int& channel, const double& timeIn){ matchNewOnsetEvent(channel, timeIn); likelihoodVisualisation[2] = bayesianStruct.likelihood; checkTempo(); } void AudioEventMatcher::checkTempo(){ if (synchroniser.speed > 0.92 && synchroniser.speed < 1.08){ // double relativeTempo = 1 + // relativeTempo += 0.4 * (synchroniser.speed - relativeTempo); printf("Speed %f new tempo %f\n\n", synchroniser.speed, synchroniser.smoothedSpeedOutput); setSpeedRatioDistribution(synchroniser.smoothedSpeedOutput); } } #pragma mark -EventMatching //Needs just to set bounds for the matching process, not have TimeIn void AudioEventMatcher::matchNewOnsetEvent(const int& channel, const double& timeIn){ bayesianStruct.updateBayesianDistributions(timeIn);//moves the posterior up into prior given the time interval and calculates new offsets //start at beginning but OPTIMISE later bayesianStruct.likelihood.offset = bayesianStruct.prior.offset; bayesianStruct.likelihood.zero();//set to zero //double quantity = 1;// double quantity; switch (channel) { case 0: quantity = kickLikelihoodToNoise;// onsetLikelihoodToNoise; //BETTER CHANGE THIS BACK TOO..see below//likelihoodToNoiseRatio / numberOfMatches; break; case 2: quantity = snareLikelihoodToNoise;// onsetLikelihoodToNoise; //BETTER CHANGE THIS BACK TOO..see below//likelihoodToNoiseRatio / numberOfMatches; break; } int numberOfMatchesFound = 0; double nearestOnsetDistance = 1000; double startMatchingTime = bayesianStruct.likelihood.offset; double endMatchingTime = bayesianStruct.likelihood.offset + matchWindowWidth; double millisTime = -1*INFINITY;//or 0 is fine int checkIndex = 0; if (channel <= recordedTracks.numberOfAudioTracks && checkIndex < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size()){ while (millisTime < startMatchingTime) { millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[checkIndex].millisTime; checkIndex++; } for (int i = checkIndex;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size() && millisTime <= endMatchingTime;i++){ millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime; if (millisTime >= startMatchingTime && millisTime <= endMatchingTime){ bayesianStruct.likelihood.addGaussianShapeFromRealTime(millisTime, onsetLikelihoodWidth, quantity); numberOfMatchesFound++; // printf("Adding Gaussian for onset at time %f offset %f\n", millisTime, bayesianStruct.likelihood.offset); if (fabs(currentAlignmentPosition - millisTime) < nearestOnsetDistance) nearestOnsetDistance = currentAlignmentPosition - millisTime; }//end if within limits (changed so it now is 4 sure) } } if (numberOfMatchesFound > 0){ // bayesianStruct.likelihood.addConstant((1-likelihoodToNoiseRatio)/bayesianStruct.likelihood.length); // bayesianStruct.likelihood.addConstant(numberOfMatchesFound*(1-onsetLikelihoodToNoise)/(onsetLikelihoodToNoise*bayesianStruct.likelihood.length)); bayesianStruct.likelihood.addConstant(numberOfMatchesFound*(1-quantity)/(bayesianStruct.likelihood.length));//BETTER CHANGE THIS BACK... bayesianStruct.likelihood.renormalise(); bayesianStruct.calculatePosterior(); lastAlignmentTime = timeIn;//use TIMESTAMP recentEventTime[channel] = timeIn;//ofGetElapsedTimeMillis() - startTime; recentPriors[channel] = bayesianStruct.prior; projectedPrior = bayesianStruct.prior; temporal.updateTempo(channel, timeIn); } int timeNow = getTimeNow(); printf("Nearest onset is %.1f time is %i and alignemnt %i time now %i\n", nearestOnsetDistance, (int) timeIn, (int)currentAlignmentPosition, timeNow); } void AudioEventMatcher::matchNewPitchEvent(const int& channel, const double& pitchIn, const double& timeIn){ //start at beginning but OPTIMISE later /*printf("TIME %i\n", ofGetElapsedTimeMillis()); //tmp debug updateBestAlignmentPosition(); printf("current alignment best estimate %f\n", currentAlignmentPosition); */ bayesianStruct.updateBayesianDistributions(timeIn);//moves the posterior up into prior given the time interval and calculates new offsets //set the lielihoods by matching the pitched note int numberOfMatches = 0; bayesianStruct.likelihood.zero();//set to zero double newOnsetTime; double closestDistance = INFINITY; double quantity = 0; double totalLikelihoodAdded = 0; if (channel <= recordedTracks.numberOfAudioTracks){ for (int i = 0;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size();i++){ if (checkMatch(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch, pitchIn)) { quantity = getPitchDistance(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch, pitchIn, pitchWidth); bayesianStruct.likelihood.addGaussianShapeFromRealTime(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime, pitchLikelihoodWidth, quantity); recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].matched = true; numberOfMatches++; totalLikelihoodAdded += quantity; } else{ recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].matched = false; } //checking nearest pitch newOnsetTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime; if (abs(newOnsetTime - currentAlignmentPosition) < closestDistance){ closestDistance = abs(newOnsetTime - currentAlignmentPosition); pitchOfNearestMatch = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].aubioPitch; distanceOfNearestMatch = quantity; } } } if (numberOfMatches > 0 && totalLikelihoodAdded > 0){//no point updating unless there is a match //replacing numberOfMatches with totalLike below... //bug here was that if totaladded = 0, we add then zero likelihood bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-pitchLikelihoodToNoise)/(bayesianStruct.likelihood.length)); // bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-pitchLikelihoodToNoise)/(pitchLikelihoodToNoise*bayesianStruct.likelihood.length)); //tmp set likelihood constant and calculate using that //bayesianStruct.likelihood.zero(); //bayesianStruct.likelihood.addConstant(1); bayesianStruct.calculatePosterior(); lastAlignmentTime = timeIn;//has to use the STAMPED time recentEventTime[channel] = timeIn; recentPriors[channel] = bayesianStruct.prior; projectedPrior = bayesianStruct.prior; temporal.eventTimes[channel].push_back(timeIn); } } double AudioEventMatcher::getPitchDistance(const double& pitchOne, const double& pitchTwo, const double& scale){ double scaleFactor = scale * pitchOne / 110.0; int multiplicationFactor = 1; if (pitchTwo > 0){ multiplicationFactor = round(pitchOne/pitchTwo); } double distance = abs(pitchOne - pitchTwo*multiplicationFactor); if (distance < scaleFactor) distance = 1 - (distance/scaleFactor); else distance = 0; //printf("[pitch distance %f vs %f, factor %i = %f\n", pitchOne, pitchTwo, multiplicationFactor, distance); return distance; } bool AudioEventMatcher::checkMatch(const double& recordedPitch, const double& livePitch){ if (livePitch > 0){ int multiplicationFactor = (int)(round(recordedPitch/livePitch)); if (abs(recordedPitch - livePitch * multiplicationFactor) < pitchCutOff) return true; else return false; }else { return false; } } void AudioEventMatcher::matchNewChromaEvent(const int& channel, float* chromaIn, const double& timeIn){ //start at beginning but OPTIMISE later makeQuantisedChroma(chromaIn); bayesianStruct.updateBayesianDistributions(timeIn);//moves the posterior up into prior given the time interval and calculates new offsets //set the likelihoods by matching the pitched note int numberOfMatches = 0; bayesianStruct.likelihood.zero();//set to zero double newOnsetTime; double closestDistance = INFINITY; double quantity = 1; double totalLikelihoodAdded = 0; double startMatchingTime = bayesianStruct.likelihood.offset; double endMatchingTime = bayesianStruct.likelihood.offset + matchWindowWidth; double millisTime = -1*INFINITY;//or 0 is fine int checkIndex = 0; if (channel <= recordedTracks.numberOfAudioTracks && checkIndex < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size()){ while (millisTime < startMatchingTime) { millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[checkIndex].millisTime; checkIndex++; }//go up to where we need to check from fast for (int i = checkIndex;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size() && millisTime <= endMatchingTime;i++){ millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime; if (millisTime >= startMatchingTime && millisTime <= endMatchingTime){ //for cts chroma /* if (useChromaDotProduct) quantity = getChromaDotProductDistance(chromaIn, &recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].chromaValues[0]); else quantity = getChromaEuclideanDistance(chromaIn, &recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].chromaValues[0]); */ // printf("Distance old way %f", quantity); //for quantised chroma quantity = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].getChromaQuantisedDistance(&quantisedChromagramReceived[0]); // printf(" quantised %f\n", quantity); bayesianStruct.likelihood.addGaussianShapeFromRealTime(recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime, chromaLikelihoodWidth, quantity); // bayesianStruct.likelihood.addGaussianShapeFromRealTime(millisTime, onsetLikelihoodWidth, quantity); numberOfMatches++; totalLikelihoodAdded += quantity; //printf("Adding CHROMA Gaussian for onset at time %.1f dist %.3f\n", millisTime, quantity); }//end if within limits (changed so it now is 4 sure) } } if (numberOfMatches > 0 && totalLikelihoodAdded > 0){//no point updating unless there is a match //replacing numberOfMatches with totalLike below... printf("CHROMA HAS %i MATCHES\n", numberOfMatches); bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-chromaLikelihoodToNoise)/(bayesianStruct.likelihood.length)); //previous way // bayesianStruct.likelihood.addConstant(totalLikelihoodAdded*(1-chromaLikelihoodToNoise)/(chromaLikelihoodToNoise*bayesianStruct.likelihood.length)); bayesianStruct.calculatePosterior(); lastAlignmentTime = timeIn;//has to use the STAMPED time recentEventTime[channel] = timeIn; recentPriors[channel] = bayesianStruct.prior; projectedPrior = bayesianStruct.prior; temporal.eventTimes[channel].push_back(timeIn); } } void AudioEventMatcher::makeQuantisedChroma(float* chromaIn){ double L_norm = 0; for (int i = 0;i < 12;i++){ L_norm += chromaIn[i]; } if (L_norm > 0){ for (int i = 0;i < 12;i++){ chromaIn[i] /= L_norm;//NB not const fn! quantisedChromagramReceived[i] = 0; if (chromaIn[i] > 0.05) quantisedChromagramReceived[i]++; if (chromaIn[i] > 0.1) quantisedChromagramReceived[i]++; if (chromaIn[i] > 0.2) quantisedChromagramReceived[i]++; if (chromaIn[i] > 0.4) quantisedChromagramReceived[i]++; } } } double AudioEventMatcher::getChromaDotProductDistance(float* chromaOne, float* chromaTwo){ double distance = 0; double total = 0; for (int i = 0;i < 12;i++){ distance += chromaOne[i]*chromaTwo[i]; total += chromaOne[i]*chromaOne[i] + (chromaTwo[i]*chromaTwo[i]); } if (total > 0) distance /= sqrt(total); return distance; } double AudioEventMatcher::getChromaEuclideanDistance(float* chromaOne, float* chromaTwo){ double distance = 0; double total = 0; // printf("\n"); for (int i = 0;i < 12;i++){ total += (chromaOne[i] - chromaTwo[i])*(chromaOne[i] - chromaTwo[i]); // printf("chroma1: %.2f; chroma2: %.2f\n", chromaOne[i], chromaTwo[i]); // total += chromaOne[i]*chromaOne[i] + (chromaTwo[i]*chromaTwo[i]); } if (total > euclideanMaximumDistance) euclideanMaximumDistance = total; distance = ((euclideanMaximumDistance - total)/ euclideanMaximumDistance);//i.e. 1 is // if (total > 0) // distance = 1.0/sqrt(total); // printf("DISTANCE : %.3f\n", distance); return distance; } void AudioEventMatcher::windowResized(const int& w, const int& h){ recordedTracks.windowResized(w,h); bayesTempoWindow.resized(w,h); bayesPositionWindow.resized(w,h); bayesLikelihoodWindow.resized(w, h); } void AudioEventMatcher::writeAllDistributions(){ std::string filepath = "../../../data/distributions/priorDistbnOutput.txt"; writeDistribution(bayesianStruct.prior, filepath); filepath = "../../../data/distributions/likelihoodDistbnOutput.txt"; writeDistribution(bayesianStruct.likelihood, filepath); filepath = "../../../data/distributions/posteriorDistbnOutput.txt"; writeDistribution(bayesianStruct.posterior, filepath); //write kick events filepath = "../../../data/distributions/kickEvents.txt"; writeKickEvent(screenStartTimeMillis, screenEndTimeMillis, filepath); ofBackground(255); recordedTracks.drawTracks(); filepath = "../../../data/distributions/screenGraphics.png"; img.grabScreen(0, 0, ofGetWidth(), ofGetHeight()); img.saveImage(filepath); ofBackground(0); } void AudioEventMatcher::writeDistribution(DynamicVector& distribution, std::string filename){ testDistributionOutput.openFile(filename); int minIndex = distribution.getRealTermsAsIndex(screenStartTimeMillis); int maxIndex = distribution.getRealTermsAsIndex(screenEndTimeMillis); int minScreenIndex = 0; int maxScreenIndex = ofGetWidth(); double stepSize = (maxScreenIndex - minScreenIndex) / (double)(maxIndex - minIndex);//step size in pixels per array bin double screenHeight = ofGetHeight(); double maxVal = distribution.getMaximum(); //OPTIMIZE!! XXX could just add stepsize each time //not add minindex each time int i = max(1,minIndex+1); // ofDrawBitmapString("i = "+ofToString(i)+" :: screen min: "+ofToString(minScreenIndex + stepSize*(i-minIndex-1)), 20, 640); while ((minScreenIndex + stepSize*(i-minIndex)) < 0) i++;//only draw what is on the screen for ( ; i < min(maxIndex+1, (int)distribution.array.size());i++){ //as array indices // testDistributionOutput.writeValue(minScreenIndex + (stepSize*(i-minIndex-1)), distribution.array[i-1]); //as millis: testDistributionOutput.writeValue(distribution.getIndexInRealTerms(i), distribution.array[i-1]); } testDistributionOutput.closeFile(); } void AudioEventMatcher::writeKickEvent(const int& startMatchingTime, const int& endMatchingTime, std::string filepath){ int channel = 0;//i.e. kick if (!testDistributionOutput.outputFile.is_open()) testDistributionOutput.closeFile(); testDistributionOutput.outputFile.open(filepath.c_str()); testDistributionOutput.outputFile << startMatchingTime << "\t" << 0 << endl; int checkIndex = 0; double millisTime = 0; if (channel <= recordedTracks.numberOfAudioTracks && checkIndex < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size()){ while (millisTime < startMatchingTime) { millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[checkIndex].millisTime; checkIndex++; } for (int i = checkIndex;i < recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets.size() && millisTime <= endMatchingTime;i++){ millisTime = recordedTracks.loadedAudioFiles[channel].fileLoader.onsetDetect.chromaOnsets[i].millisTime; if (millisTime >= startMatchingTime && millisTime <= endMatchingTime){ testDistributionOutput.outputFile << (int) millisTime - 1 << "\t" << 0 << endl; testDistributionOutput.outputFile << (int) millisTime << "\t" << 1 << endl; testDistributionOutput.outputFile << (int) millisTime + 1 << "\t" << 0 << endl; }//end if within limits (changed so it now is 4 sure) } } testDistributionOutput.outputFile << endMatchingTime << "\t" << 0 << endl; testDistributionOutput.closeFile(); } /* void printPosteriorMAPinfo(){ //tmp print stuff printf("New pitch MAP post estimate now %i, ", bayesianStruct.posterior.MAPestimate); double tmp = bayesianStruct.posterior.getMAPestimate(); printf(" getting it %f and offset %f == %f ms\n", tmp, bayesianStruct.posterior.offset, bayesianStruct.posterior.getIndexInRealTerms(tmp)); } */