changeset 161:246c203cc733

Add wavelet-based pitch tracker
author Jamie Bullock <jamie@jamiebullock.com>
date Fri, 31 May 2013 22:44:03 +0100
parents f7d0b6607193
children 39d168ee5917
files examples/puredata/simple-test.pd examples/puredata/xtract~.c src/Makefile.am src/descriptors.c src/dywapitchtrack/dywapitchtrack.c src/dywapitchtrack/dywapitchtrack.h src/init.c src/libxtract.c src/scalar.c src/xtract_globals_private.h xtract/libxtract.h xtract/xtract_scalar.h
diffstat 12 files changed, 636 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/examples/puredata/simple-test.pd	Fri May 31 22:43:17 2013 +0100
+++ b/examples/puredata/simple-test.pd	Fri May 31 22:44:03 2013 +0100
@@ -1,11 +1,8 @@
 #N canvas 642 248 670 289 10;
 #X obj 192 63 osc~ 440;
 #X floatatom 194 187 10 0 0 0 - - -;
-#X obj 194 150 xtract~ spectral_mean;
 #X obj 193 94 *~ 0.3;
-#X text 337 121 Optional second argument gives blocksize;
-#X obj 193 121 xtract~ spectrum 64;
-#X connect 0 0 3 0;
-#X connect 2 0 1 0;
-#X connect 3 0 5 0;
-#X connect 5 0 2 0;
+#X obj 193 137 xtract~ f0;
+#X connect 0 0 2 0;
+#X connect 2 0 3 0;
+#X connect 3 0 1 0;
--- a/examples/puredata/xtract~.c	Fri May 31 22:43:17 2013 +0100
+++ b/examples/puredata/xtract~.c	Fri May 31 22:44:03 2013 +0100
@@ -72,7 +72,7 @@
     t_int rv = 0;
     double result = 0.0;
 
-    for(n = 0; n < N; ++n) {
+    for(t_int n = 0; n < N; ++n) {
         x->data[n] = (double)in[n];
     }
 
@@ -148,7 +148,7 @@
     t_int n, N, M, f, F, 
           n_args, 
           type;
-    t_float *argv_max;
+    double *argv_max;
     t_symbol *arg1;
     xtract_function_descriptor_t *fd;
     char *p_name, 
@@ -259,7 +259,7 @@
     if(x->is_subframe)
         N = M;
 
-    post("xtract~: window size: %d", N);
+    post("xtract~: assumed window size: %d", N);
 
     /* do init if needed */
     if(x->feature == XTRACT_MFCC){
@@ -271,9 +271,9 @@
         post("xtract~: mfcc: filters = %d", 
 		((xtract_mel_filter *)x->argv)->n_filters);
         mf->filters = 
-            (t_float **)getbytes(mf->n_filters * sizeof(t_float *));
+            (double **)getbytes(mf->n_filters * sizeof(double *));
         for(n = 0; n < mf->n_filters; n++)
-            mf->filters[n] = (float *)getbytes(N * sizeof(float));
+            mf->filters[n] = (double *)getbytes(N * sizeof(double));
                  
         xtract_init_mfcc(N, NYQUIST, XTRACT_EQUAL_GAIN, 80.0f,
                 18000.0f, mf->n_filters, mf->filters);
@@ -288,6 +288,9 @@
         x->argv = x->window;
         x->done_init = 1;
     }
+    else if(x->feature == XTRACT_WAVELET_F0){
+        xtract_init_wavelet_f0_state();
+    }
 
     /* Initialise fft_plan if required */
     if(x->feature == XTRACT_AUTOCORRELATION_FFT ||
--- a/src/Makefile.am	Fri May 31 22:43:17 2013 +0100
+++ b/src/Makefile.am	Fri May 31 22:44:03 2013 +0100
@@ -16,6 +16,7 @@
 	  window.c \
 	  fini.c \
 	  helper.c \
+	  dywapitchtrack/dywapitchtrack.c \
 	  $(OOURA)
 
 lib_LTLIBRARIES = libxtract.la
--- a/src/descriptors.c	Fri May 31 22:43:17 2013 +0100
+++ b/src/descriptors.c	Fri May 31 22:44:03 2013 +0100
@@ -87,6 +87,7 @@
             break;
         case XTRACT_F0:
         case XTRACT_FAILSAFE_F0:
+        case XTRACT_WAVELET_F0:
             *argv_min = XTRACT_SR_LOWER_LIMIT;
             *argv_max = XTRACT_SR_UPPER_LIMIT;
             *argv_def = XTRACT_SR_DEFAULT;
@@ -228,6 +229,7 @@
         case XTRACT_LOWEST_VALUE:
         case XTRACT_F0:
         case XTRACT_FAILSAFE_F0:
+        case XTRACT_WAVELET_F0:
             *argv_donor = XTRACT_ANY;
             break;
         case XTRACT_MFCC:
@@ -346,6 +348,7 @@
             break;
         case XTRACT_F0:
         case XTRACT_FAILSAFE_F0:
+        case XTRACT_WAVELET_F0:
         case XTRACT_SPECTRUM:
         case XTRACT_AUTOCORRELATION:
         case XTRACT_AUTOCORRELATION_FFT:
@@ -413,6 +416,7 @@
         case XTRACT_LNORM:
         case XTRACT_F0:
         case XTRACT_FAILSAFE_F0:
+        case XTRACT_WAVELET_F0:
         case XTRACT_MFCC:
         case XTRACT_AUTOCORRELATION:
         case XTRACT_AUTOCORRELATION_FFT:
@@ -608,6 +612,14 @@
                    "Extract the fundamental frequency of an audio signal (failsafe)");
             strcpy(author, "Jamie Bullock");
             break;
+        case XTRACT_WAVELET_F0:
+            strcpy(name, "wavelet_f0");
+            strcpy(p_name, "Fundamental Frequency (wavelet method)");
+            strcpy(desc, "Extract the fundamental frequency of a signal (wavelet method)");
+            strcpy(p_desc,
+                   "Extract the fundamental frequency of an audio signal (wavelet method)");
+            strcpy(author, "Antoine Schmitt");
+            break;
         case XTRACT_TONALITY:
             strcpy(name, "tonality");
             strcpy(p_name, "Tonality");
@@ -980,6 +992,7 @@
         case XTRACT_LOWEST_VALUE:
         case XTRACT_F0:
         case XTRACT_FAILSAFE_F0:
+        case XTRACT_WAVELET_F0:
         case XTRACT_FLATNESS_DB:
         case XTRACT_TONALITY:
             *argc = 1;
@@ -1103,6 +1116,7 @@
         case XTRACT_HPS:
         case XTRACT_F0:
         case XTRACT_FAILSAFE_F0:
+        case XTRACT_WAVELET_F0:
         case XTRACT_FLUX:
         case XTRACT_LNORM:
         case XTRACT_NONZERO_COUNT:
@@ -1180,6 +1194,7 @@
         case XTRACT_HPS:
         case XTRACT_F0:
         case XTRACT_FAILSAFE_F0:
+        case XTRACT_WAVELET_F0:
         case XTRACT_NONZERO_COUNT:
         case XTRACT_AUTOCORRELATION:
         case XTRACT_AMDF:
@@ -1248,6 +1263,7 @@
             case XTRACT_SPREAD:
             case XTRACT_F0:
             case XTRACT_FAILSAFE_F0:
+            case XTRACT_WAVELET_F0:
             case XTRACT_HPS:
             case XTRACT_ROLLOFF:
                 *result_unit = XTRACT_HERTZ;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dywapitchtrack/dywapitchtrack.c	Fri May 31 22:44:03 2013 +0100
@@ -0,0 +1,448 @@
+/* dywapitchtrack.c
+ 
+ Dynamic Wavelet Algorithm Pitch Tracking library
+ Released under the MIT open source licence
+  
+ Copyright (c) 2010 Antoine Schmitt
+ 
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ 
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include "dywapitchtrack.h"
+#include <math.h>
+#include <stdlib.h>
+#include <string.h> // for memset
+
+
+//**********************
+//       Utils
+//**********************
+
+#ifndef max
+#define max(x, y) ((x) > (y)) ? (x) : (y)
+#endif
+#ifndef min
+#define min(x, y) ((x) < (y)) ? (x) : (y)
+#endif
+
+// returns 1 if power of 2
+int _power2p(int value) {
+	if (value == 0) return 1;
+	if (value == 2) return 1;
+	if (value & 0x1) return 0;
+	return (_power2p(value >> 1));
+}
+
+// count number of bits
+int _bitcount(int value) {
+	if (value == 0) return 0;
+	if (value == 1) return 1;
+	if (value == 2) return 2;
+	return _bitcount(value >> 1) + 1;
+}
+
+// closest power of 2 above or equal
+int _ceil_power2(int value) {
+	if (_power2p(value)) return value;
+	
+	if (value == 1) return 2;
+	int j, i = _bitcount(value);
+	int res = 1;
+	for (j = 0; j < i; j++) res <<= 1;
+	return res;
+}
+
+// closest power of 2 below or equal
+int _floor_power2(int value) {
+	if (_power2p(value)) return value;
+	return _ceil_power2(value)/2;
+}
+
+// abs value
+int _iabs(int x) {
+	if (x >= 0) return x;
+	return -x;
+}
+
+// 2 power
+int _2power(int i) {
+	int res = 1, j;
+	for (j = 0; j < i; j++) res <<= 1;
+	return res;
+}
+
+//******************************
+// the Wavelet algorithm itself
+//******************************
+
+int dywapitch_neededsamplecount(int minFreq) {
+	int nbSam = 3*44100/minFreq; // 1017. for 130 Hz
+	nbSam = _ceil_power2(nbSam); // 1024
+	return nbSam;
+}
+
+typedef struct _minmax {
+	int index;
+	struct _minmax *next;
+} minmax;
+
+double _dywapitch_computeWaveletPitch(double * samples, int startsample, int samplecount) {
+	double pitchF = 0.0;
+	
+	int i, j;
+	double si, si1;
+	
+	// must be a power of 2
+	samplecount = _floor_power2(samplecount);
+	
+	double *sam = (double *)malloc(sizeof(double)*samplecount);
+	memcpy(sam, samples + startsample, sizeof(double)*samplecount);
+	int curSamNb = samplecount;
+	
+	int *distances = (int *)malloc(sizeof(int)*samplecount);
+	int *mins = (int *)malloc(sizeof(int)*samplecount);
+	int *maxs = (int *)malloc(sizeof(int)*samplecount);
+	int nbMins, nbMaxs;
+	
+	// algorithm parameters
+	int maxFLWTlevels = 6;
+	double maxF = 3000.;
+	int differenceLevelsN = 3;
+	double maximaThresholdRatio = 0.75;
+	
+	double ampltitudeThreshold;  
+	double theDC = 0.0;
+	
+	{ // compute ampltitudeThreshold and theDC
+		//first compute the DC and maxAMplitude
+		double maxValue = 0.0;
+		double minValue = 0.0;
+		for (i = 0; i < samplecount;i++) {
+			si = sam[i];
+			theDC = theDC + si;
+			if (si > maxValue) maxValue = si;
+			if (si < minValue) minValue = si;
+		}
+		theDC = theDC/samplecount;
+		maxValue = maxValue - theDC;
+		minValue = minValue - theDC;
+		double amplitudeMax = (maxValue > -minValue ? maxValue : -minValue);
+		
+		ampltitudeThreshold = amplitudeMax*maximaThresholdRatio;
+		//asLog("dywapitch theDC=%f ampltitudeThreshold=%f\n", theDC, ampltitudeThreshold);
+		
+	}
+	
+	// levels, start without downsampling..
+	int curLevel = 0;
+	double curModeDistance = -1.;
+	int delta;
+	
+	while(1) {
+		
+		// delta
+		delta = 44100./(_2power(curLevel)*maxF);
+		//("dywapitch doing level=%ld delta=%ld\n", curLevel, delta);
+		
+		if (curSamNb < 2) goto cleanup;
+		
+		// compute the first maximums and minumums after zero-crossing
+		// store if greater than the min threshold
+		// and if at a greater distance than delta
+		double dv, previousDV = -1000;
+		nbMins = nbMaxs = 0;   
+		int lastMinIndex = -1000000;
+		int lastmaxIndex = -1000000;
+		int findMax = 0;
+		int findMin = 0;
+		for (i = 2; i < curSamNb; i++) {
+			si = sam[i] - theDC;
+			si1 = sam[i-1] - theDC;
+			
+			if (si1 <= 0 && si > 0) findMax = 1;
+			if (si1 >= 0 && si < 0) findMin = 1;
+			
+			// min or max ?
+			dv = si - si1;
+			
+			if (previousDV > -1000) {
+				
+				if (findMin && previousDV < 0 && dv >= 0) { 
+					// minimum
+					if (fabs(si) >= ampltitudeThreshold) {
+						if (i > lastMinIndex + delta) {
+							mins[nbMins++] = i;
+							lastMinIndex = i;
+							findMin = 0;
+							//if DEBUGG then put "min ok"&&si
+							//
+						} else {
+							//if DEBUGG then put "min too close to previous"&&(i - lastMinIndex)
+							//
+						}
+					} else {
+						// if DEBUGG then put "min "&abs(si)&" < thresh = "&ampltitudeThreshold
+						//--
+					}
+				}
+				
+				if (findMax && previousDV > 0 && dv <= 0) {
+					// maximum
+					if (fabs(si) >= ampltitudeThreshold) {
+						if (i > lastmaxIndex + delta) {
+							maxs[nbMaxs++] = i;
+							lastmaxIndex = i;
+							findMax = 0;
+						} else {
+							//if DEBUGG then put "max too close to previous"&&(i - lastmaxIndex)
+							//--
+						}
+					} else {
+						//if DEBUGG then put "max "&abs(si)&" < thresh = "&ampltitudeThreshold
+						//--
+					}
+				}
+			}
+			
+			previousDV = dv;
+		}
+		
+		if (nbMins == 0 && nbMaxs == 0) {
+			// no best distance !
+			//asLog("dywapitch no mins nor maxs, exiting\n");
+			
+			// if DEBUGG then put "no mins nor maxs, exiting"
+			goto cleanup;
+		}
+		//if DEBUGG then put count(maxs)&&"maxs &"&&count(mins)&&"mins"
+		
+		// maxs = [5, 20, 100,...]
+		// compute distances
+		int d;
+		memset(distances, 0, samplecount*sizeof(int));
+		for (i = 0 ; i < nbMins ; i++) {
+			for (j = 1; j < differenceLevelsN; j++) {
+				if (i+j < nbMins) {
+					d = _iabs(mins[i] - mins[i+j]);
+					//asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d);
+					distances[d] = distances[d] + 1;
+				}
+			}
+		}
+		for (i = 0 ; i < nbMaxs ; i++) {
+			for (j = 1; j < differenceLevelsN; j++) {
+				if (i+j < nbMaxs) {
+					d = _iabs(maxs[i] - maxs[i+j]);
+					//asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d);
+					distances[d] = distances[d] + 1;
+				}
+			}
+		}
+		
+		// find best summed distance
+		int bestDistance = -1;
+		int bestValue = -1;
+		for (i = 0; i< curSamNb; i++) {
+			int summed = 0;
+			for (j = -delta ; j <= delta ; j++) {
+				if (i+j >=0 && i+j < curSamNb)
+					summed += distances[i+j];
+			}
+			//asLog("dywapitch i=%ld summed=%ld bestDistance=%ld\n", i, summed, bestDistance);
+			if (summed == bestValue) {
+				if (i == 2*bestDistance)
+					bestDistance = i;
+				
+			} else if (summed > bestValue) {
+				bestValue = summed;
+				bestDistance = i;
+			}
+		}
+		//asLog("dywapitch bestDistance=%ld\n", bestDistance);
+		
+		// averaging
+		double distAvg = 0.0;
+		double nbDists = 0;
+		for (j = -delta ; j <= delta ; j++) {
+			if (bestDistance+j >=0 && bestDistance+j < samplecount) {
+				int nbDist = distances[bestDistance+j];
+				if (nbDist > 0) {
+					nbDists += nbDist;
+					distAvg += (bestDistance+j)*nbDist;
+				}
+			}
+		}
+		// this is our mode distance !
+		distAvg /= nbDists;
+		//asLog("dywapitch distAvg=%f\n", distAvg);
+		
+		// continue the levels ?
+		if (curModeDistance > -1.) {
+			double similarity = fabs(distAvg*2 - curModeDistance);
+			if (similarity <= 2*delta) {
+				//if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"ok"
+ 				//asLog("dywapitch similarity=%f OK !\n", similarity);
+				// two consecutive similar mode distances : ok !
+				pitchF = 44100./(_2power(curLevel-1)*curModeDistance);
+				goto cleanup;
+			}
+			//if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"not"
+		}
+		
+		// not similar, continue next level
+		curModeDistance = distAvg;
+		
+		curLevel = curLevel + 1;
+		if (curLevel >= maxFLWTlevels) {
+			// put "max levels reached, exiting"
+ 			//asLog("dywapitch max levels reached, exiting\n");
+			goto cleanup;
+		}
+		
+		// downsample
+		if (curSamNb < 2) {
+ 			//asLog("dywapitch not enough samples, exiting\n");
+			goto cleanup;
+		}
+		for (i = 0; i < curSamNb/2; i++) {
+			sam[i] = (sam[2*i] + sam[2*i + 1])/2.;
+		}
+		curSamNb /= 2;
+	}
+	
+	///
+cleanup:
+	free(distances);
+	free(mins);
+	free(maxs);
+	free(sam);
+	
+	return pitchF;
+}
+
+// ***********************************
+// the dynamic postprocess
+// ***********************************
+
+/***
+It states: 
+ - a pitch cannot change much all of a sudden (20%) (impossible humanly,
+ so if such a situation happens, consider that it is a mistake and drop it. 
+ - a pitch cannot double or be divided by 2 all of a sudden : it is an
+ algorithm side-effect : divide it or double it by 2. 
+ - a lonely voiced pitch cannot happen, nor can a sudden drop in the middle
+ of a voiced segment. Smooth the plot. 
+***/
+
+double _dywapitch_dynamicprocess(dywapitchtracker *pitchtracker, double pitch) {
+	
+	// equivalence
+	if (pitch == 0.0) pitch = -1.0;
+	
+	//
+	double estimatedPitch = -1;
+	double acceptedError = 0.2f;
+	int maxConfidence = 5;
+	
+	if (pitch != -1) {
+		// I have a pitch here
+		
+		if (pitchtracker->_prevPitch == -1) {
+			// no previous
+			estimatedPitch = pitch;
+			pitchtracker->_prevPitch = pitch;
+			pitchtracker->_pitchConfidence = 1;
+			
+		} else if (abs(pitchtracker->_prevPitch - pitch)/pitch < acceptedError) {
+			// similar : remember and increment pitch
+			pitchtracker->_prevPitch = pitch;
+			estimatedPitch = pitch;
+			pitchtracker->_pitchConfidence = min(maxConfidence, pitchtracker->_pitchConfidence + 1); // maximum 3
+			
+		} else if ((pitchtracker->_pitchConfidence >= maxConfidence-2) && abs(pitchtracker->_prevPitch - 2.*pitch)/(2.*pitch) < acceptedError) {
+			// close to half the last pitch, which is trusted
+			estimatedPitch = 2.*pitch;
+			pitchtracker->_prevPitch = estimatedPitch;
+			
+		} else if ((pitchtracker->_pitchConfidence >= maxConfidence-2) && abs(pitchtracker->_prevPitch - 0.5*pitch)/(0.5*pitch) < acceptedError) {
+			// close to twice the last pitch, which is trusted
+			estimatedPitch = 0.5*pitch;
+			pitchtracker->_prevPitch = estimatedPitch;
+			
+		} else {
+			// nothing like this : very different value
+			if (pitchtracker->_pitchConfidence >= 1) {
+				// previous trusted : keep previous
+				estimatedPitch = pitchtracker->_prevPitch;
+				pitchtracker->_pitchConfidence = max(0, pitchtracker->_pitchConfidence - 1);
+			} else {
+				// previous not trusted : take current
+				estimatedPitch = pitch;
+				pitchtracker->_prevPitch = pitch;
+				pitchtracker->_pitchConfidence = 1;
+			}
+		}
+		
+	} else {
+		// no pitch now
+		if (pitchtracker->_prevPitch != -1) {
+			// was pitch before
+			if (pitchtracker->_pitchConfidence >= 1) {
+				// continue previous
+				estimatedPitch = pitchtracker->_prevPitch;
+				pitchtracker->_pitchConfidence = max(0, pitchtracker->_pitchConfidence - 1);
+			} else {
+				pitchtracker->_prevPitch = -1;
+				estimatedPitch = -1.;
+				pitchtracker->_pitchConfidence = 0;
+			}
+		}
+	}
+	
+	// put "_pitchConfidence="&pitchtracker->_pitchConfidence
+	if (pitchtracker->_pitchConfidence >= 1) {
+		// ok
+		pitch = estimatedPitch;
+	} else {
+		pitch = -1;
+	}
+	
+	// equivalence
+	if (pitch == -1) pitch = 0.0;
+	
+	return pitch;
+}
+
+
+// ************************************
+// the API main entry points
+// ************************************
+
+void dywapitch_inittracking(dywapitchtracker *pitchtracker) {
+	pitchtracker->_prevPitch = -1.;
+	pitchtracker->_pitchConfidence = -1;
+}
+
+double dywapitch_computepitch(dywapitchtracker *pitchtracker, double * samples, int startsample, int samplecount) {
+	double raw_pitch = _dywapitch_computeWaveletPitch(samples, startsample, samplecount);
+	return _dywapitch_dynamicprocess(pitchtracker, raw_pitch);
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dywapitchtrack/dywapitchtrack.h	Fri May 31 22:44:03 2013 +0100
@@ -0,0 +1,110 @@
+/* dywapitchtrack.h
+ 
+ Dynamic Wavelet Algorithm Pitch Tracking library
+ Released under the MIT open source licence
+ 
+ Copyright (c) 2010 Antoine Schmitt
+  
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ 
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+/* Documentation
+ 
+ The dywapitchtrack library computes the pitch of an audio stream in real time.
+ 
+ The pitch is the main frequency of the waveform (the 'note' being played or sung).
+ It is expressed as a float in Hz.
+ 
+ Unlike the human ear, pitch detection is difficult to achieve for computers. Many
+ algorithms have been designed and experimented, but there is no 'best' algorithm.
+ They all depend on the context and the tradeoffs acceptable in terms of speed and
+ latency. The context includes the quality and 'cleanness' of the audio : obviously
+ polyphonic sounds (multiple instruments playing different notes at the same time)
+ are extremely difficult to track, percussive or noisy audio has no pitch, most
+ real-life audio have some noisy moments, some instruments have a lot of harmonics,
+ etc...
+ 
+ The dywapitchtrack is based on a custom-tailored algorithm which is of very high quality:
+ both very accurate (precision < 0.05 semitones), very low latency (< 23 ms) and
+ very low error rate. It has been  thoroughly tested on human voice.
+ 
+ It can best be described as a dynamic wavelet algorithm (dywa):
+ 
+ The heart of the algorithm is a very powerful wavelet algorithm, described in a paper
+ by Eric Larson and Ross Maddox : "Real-Time Time-Domain Pitch Tracking Using Wavelets"
+ http://online.physics.uiuc.edu/courses/phys498pom/NSF_REU_Reports/2005_reu/Real-Time_Time-Domain_Pitch_Tracking_Using_Wavelets.pdf
+ 
+ This central algorithm has been improved by adding dynamic tracking, to reduce the
+ common problems of frequency-halving and voiced/unvoiced errors. This dynamic tracking
+ explains the need for a tracking structure (dywapitchtracker). The dynamic tracking assumes
+ that the main function dywapitch_computepitch is called repeatedly, as it follows the pitch
+ over time and makes assumptions about human voice capabilities and reallife conditions
+ (as documented inside the code).
+ 
+ Note : The algorithm currently assumes a 44100Hz audio sampling rate. If you use a different
+ samplerate, you can just multiply the resulting pitch by the ratio between your samplerate and 44100.
+*/
+
+/* Usage
+
+ // Allocate your audio buffers and start the audio stream.
+ // Allocate a 'dywapitchtracker' structure.
+ // Start the pitch tracking by calling 'dywapitch_inittracking'.
+ dywapitchtracker pitchtracker;
+ dywapitch_inittracking(&pitchtracker);
+ 
+ // For each available audio buffer, call 'dywapitch_computepitch'
+ double thepitch = dywapitch_computepitch(&pitchtracker, samples, start, count);
+ 
+*/
+
+#ifndef dywapitchtrack__H
+#define dywapitchtrack__H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// structure to hold tracking data
+typedef struct _dywapitchtracker {
+	double	_prevPitch;
+	int		_pitchConfidence;
+} dywapitchtracker;
+
+// returns the number of samples needed to compute pitch for fequencies equal and above the given minFreq (in Hz)
+// useful to allocate large enough audio buffer 
+// ex : for frequencies above 130Hz, you need 1024 samples (assuming a 44100 Hz samplerate)
+int dywapitch_neededsamplecount(int minFreq);
+
+// call before computing any pitch, passing an allocated dywapitchtracker structure
+void dywapitch_inittracking(dywapitchtracker *pitchtracker);
+
+// computes the pitch. Pass the inited dywapitchtracker structure
+// samples : a pointer to the sample buffer
+// startsample : the index of teh first sample to use in teh sample buffer
+// samplecount : the number of samples to use to compte the pitch
+// return 0.0 if no pitch was found (sound too low, noise, etc..)
+double dywapitch_computepitch(dywapitchtracker *pitchtracker, double * samples, int startsample, int samplecount);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
+
--- a/src/init.c	Fri May 31 22:43:17 2013 +0100
+++ b/src/init.c	Fri May 31 22:44:03 2013 +0100
@@ -35,6 +35,7 @@
 
 #include "../xtract/libxtract.h"
 #include "xtract_window_private.h"
+#include "xtract_scalar_private.h"
 #define DEFINE_GLOBALS
 #include "xtract_globals_private.h"
 
@@ -361,6 +362,11 @@
 
 }
 
+int xtract_init_wavelet_f0_state(void)
+{
+    dywapitch_inittracking(&wavelet_f0_state);
+}
+
 double *xtract_init_window(const int N, const int type)
 {
     double *window;
--- a/src/libxtract.c	Fri May 31 22:43:17 2013 +0100
+++ b/src/libxtract.c	Fri May 31 22:44:03 2013 +0100
@@ -68,6 +68,7 @@
     xtract_hps,
     xtract_f0,
     xtract_failsafe_f0,
+    xtract_wavelet_f0,
     /* xtract_delta.h */
     xtract_lnorm,
     xtract_flux,
--- a/src/scalar.c	Fri May 31 22:43:17 2013 +0100
+++ b/src/scalar.c	Fri May 31 22:44:03 2013 +0100
@@ -28,9 +28,12 @@
 #include <stdio.h>
 #include <math.h>
 
-#include "../xtract/libxtract.h"
-#include "../xtract/xtract_helper.h"
+#include "dywapitchtrack/dywapitchtrack.h"
+
+#include "xtract/libxtract.h"
+#include "xtract/xtract_helper.h"
 #include "xtract_macros_private.h"
+#include "xtract_globals_private.h"
 
 int xtract_mean(const double *data, const int N, const void *argv, double *result)
 {
@@ -953,7 +956,6 @@
 
     if(return_code == XTRACT_NO_RESULT)
     {
-
         sr = *(double *)argv;
         if(sr == 0)
             sr = 44100.0;
@@ -975,3 +977,18 @@
 
 }
 
+int xtract_wavelet_f0(const double *data, const int N, const void *argv, double *result)
+{
+    double sr = *(double *)argv;
+
+    *result = dywapitch_computepitch(&wavelet_f0_state, data, 0, N);
+
+    if (*result == 0.0)
+    {
+        return XTRACT_NO_RESULT;
+    }
+
+    return XTRACT_SUCCESS;
+}
+
+
--- a/src/xtract_globals_private.h	Fri May 31 22:43:17 2013 +0100
+++ b/src/xtract_globals_private.h	Fri May 31 22:44:03 2013 +0100
@@ -27,6 +27,7 @@
 #define XTRACT_GLOBALS_PRIVATE_H
 
 #include "fft.h"
+#include "dywapitchtrack/dywapitchtrack.h"
 
 #ifdef DEFINE_GLOBALS
 #define GLOBAL
@@ -46,5 +47,7 @@
 GLOBAL xtract_vdsp_data vdsp_data_autocorrelation_fft;
 #endif
 
+GLOBAL dywapitchtracker wavelet_f0_state;
+
 #endif /* Header guard */
 
--- a/xtract/libxtract.h	Fri May 31 22:43:17 2013 +0100
+++ b/xtract/libxtract.h	Fri May 31 22:44:03 2013 +0100
@@ -116,6 +116,7 @@
     XTRACT_HPS,
     XTRACT_F0,
     XTRACT_FAILSAFE_F0,
+    XTRACT_WAVELET_F0,
     XTRACT_LNORM,
     XTRACT_FLUX,
     XTRACT_ATTACK_TIME,
@@ -361,6 +362,9 @@
 
 #endif
 
+/** \brief A function to initialise wavelet f0 detector state */
+int xtract_init_wavelet_f0_state(void);
+
 /** \brief A structure to store a set of n_filters Mel filters */
 typedef struct xtract_mel_filter_ {
     int n_filters;
--- a/xtract/xtract_scalar.h	Fri May 31 22:43:17 2013 +0100
+++ b/xtract/xtract_scalar.h	Fri May 31 22:44:03 2013 +0100
@@ -416,6 +416,21 @@
  */
 int xtract_failsafe_f0(const double *data, const int N, const void *argv, double *result);
 
+/** \brief Extract the fundamental frequency of an input vector using wavelet-based method
+ * 
+ * \param *data: a pointer to the first element in an array of doubles representing an audio vector 
+ * \param N: the number of elements to be considered
+ * \param *argv: a pointer to a double representing the audio sample rate
+ * \param *result: the pitch of N values from the array pointed to by *data
+ *
+ * This function uses the time-domain wavelet-based method described in Larson and Maddox (2005) and
+ * implemented in the dywapitchtrack library
+ *  
+ * xtract_init_wavelet_f0_state() must be called exactly once prior to calling xtract_wavelet_f0()
+ *
+ */
+int xtract_wavelet_f0(const double *data, const int N, const void *argv, double *result);
+
 /** \brief Extract the number of non-zero elements in an input vector
  * 
  * \param *data: a pointer to the first element in an array of doubles