Mercurial > hg > audiodb
changeset 768:b9dbe4611dde
Adding Kullback-Leibler divergence as alternate distance function
author | mas01mc |
---|---|
date | Sat, 15 Oct 2011 17:28:07 +0000 |
parents | 6d0d41604aba |
children | 512797443824 |
files | Makefile audioDB-internals.h audioDB.cpp audioDB.h audioDB_API.h bindings/python/pyadb.py bindings/python/pyadbmodule.c gengetopt.in query.cpp |
diffstat | 9 files changed, 86 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Thu Jun 02 16:31:44 2011 +0000 +++ b/Makefile Sat Oct 15 17:28:07 2011 +0000 @@ -1,7 +1,7 @@ HELP2MAN=help2man GENGETOPT=gengetopt SOAPCPP2=soapcpp2 -GSOAP_INCLUDE=$(shell pkg-config --cflags gsoap++) +GSOAP_INCLUDE=-I$(HOME)/src/gsoap-2.8/gsoap -L$(HOME)/src/gsoap-2.8/gsoap GSOAP_CPP=$(shell pkg-config --libs gsoap++) GSL_INCLUDE=$(shell pkg-config --cflags gsl) LIBGSL=$(shell pkg-config --libs gsl) @@ -26,8 +26,12 @@ LIBRARY=lib$(EXECUTABLE).so.$(SOVERSION).$(MINORVERSION) SHARED_LIB_FLAGS=-shared -Wl,-soname,lib$(EXECUTABLE).so.$(SOVERSION) +# set for production operation override CFLAGS+=-g -O3 -fPIC +# set for debug operation +#override CFLAGS+=-ggdb -DDEBUG -fPIC + # set to generate profile (gprof) and coverage (gcov) info #override CFLAGS+=-fprofile-arcs -ftest-coverage -pg
--- a/audioDB-internals.h Thu Jun 02 16:31:44 2011 +0000 +++ b/audioDB-internals.h Sat Oct 15 17:28:07 2011 +0000 @@ -272,6 +272,23 @@ return result; } +static inline double audiodb_kullback_leibler(double *p, double *q, size_t count) { + double a,b, tmp1, tmp2, result = 0; + while(count--){ + a = *p++; + b = *q++; + tmp1 = a * log( a / b ); + if(isnan(tmp1)) + tmp1=0.0; + tmp2 = b * log( b / a ); + if(isnan(tmp2)) + tmp2=0.0; + result += ( tmp1 + tmp2 ) / 2.0; + } + return result; +} + + static inline void audiodb_l2norm_buffer(double *d, size_t dim, size_t nvectors, double *l) { while(nvectors--) { double *d1 = d; @@ -386,9 +403,9 @@ #define ADB_TRACKTABLE_ENTRY_SIZE (sizeof(uint32_t)) #define ADB_DISTANCE_TOLERANCE (1e-6) -#define ADB_DEFAULT_DATASIZE (1355U) /* in MB */ -#define ADB_DEFAULT_NTRACKS (20000U) -#define ADB_DEFAULT_DATADIM (9U) +#define ADB_DEFAULT_DATASIZE (2000U) /* in MB */ +#define ADB_DEFAULT_NTRACKS (200000U) +#define ADB_DEFAULT_DATADIM (12U) #define ADB_FIXME_LARGE_ADB_SIZE (ADB_DEFAULT_DATASIZE+1) #define ADB_FIXME_LARGE_ADB_NTRACKS (ADB_DEFAULT_NTRACKS+1)
--- a/audioDB.cpp Thu Jun 02 16:31:44 2011 +0000 +++ b/audioDB.cpp Sat Oct 15 17:28:07 2011 +0000 @@ -447,6 +447,7 @@ } // Set no_unit_norm flag + distance_kullback = args_info.distance_kullback_flag; no_unit_norming = args_info.no_unit_norming_flag; lsh_use_u_functions = args_info.lsh_use_u_functions_flag; @@ -919,7 +920,10 @@ case O2_SEQUENCE_QUERY: case O2_N_SEQUENCE_QUERY: qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK; - qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; + if (distance_kullback) + qspec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; + else + qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; qspec.params.npoints = pointNN; qspec.params.ntracks = trackNN; switch(queryType) { @@ -941,7 +945,10 @@ break; case O2_ONE_TO_ONE_N_SEQUENCE_QUERY: qspec.params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE; - qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; + if (distance_kullback) + qspec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; + else + qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; qspec.params.npoints = 0; qspec.params.ntracks = 0; if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) { @@ -1096,8 +1103,10 @@ spec.qid.sequence_length = sequenceLength; spec.qid.flags |= usingQueryPoint ? 0 : ADB_QID_FLAG_EXHAUSTIVE; spec.qid.sequence_start = queryPoint; - - spec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; + if (distance_kullback) + spec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; + else + spec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; spec.params.accumulation = ADB_ACCUMULATION_DB; spec.params.npoints = nsamples;
--- a/audioDB.h Thu Jun 02 16:31:44 2011 +0000 +++ b/audioDB.h Sat Oct 15 17:28:07 2011 +0000 @@ -71,6 +71,7 @@ #define COM_EXHAUSTIVE "--exhaustive" #define COM_LSH_EXACT "--lsh_exact" #define COM_NO_UNIT_NORMING "--no_unit_norming" +#define COM_DISTANCE_KULLBACK "--distance_kullback" #define O2_DEFAULT_POINTNN (10U) #define O2_DEFAULT_TRACKNN (10U) @@ -236,6 +237,7 @@ unsigned sequenceHop; bool normalizedDistance; bool no_unit_norming; + bool distance_kullback; unsigned queryPoint; unsigned usingQueryPoint; unsigned usingTimes; @@ -369,6 +371,7 @@ sequenceHop(1), \ normalizedDistance(true), \ no_unit_norming(false), \ + distance_kullback(false), \ queryPoint(0), \ usingQueryPoint(0), \ usingTimes(0), \
--- a/audioDB_API.h Thu Jun 02 16:31:44 2011 +0000 +++ b/audioDB_API.h Sat Oct 15 17:28:07 2011 +0000 @@ -88,6 +88,7 @@ #define ADB_DISTANCE_DOT_PRODUCT 1 #define ADB_DISTANCE_EUCLIDEAN_NORMED 2 #define ADB_DISTANCE_EUCLIDEAN 3 +#define ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE 4 typedef struct adb_query_parameters { uint32_t accumulation;
--- a/bindings/python/pyadb.py Thu Jun 02 16:31:44 2011 +0000 +++ b/bindings/python/pyadb.py Sat Oct 15 17:28:07 2011 +0000 @@ -116,7 +116,7 @@ exhaustive : boolean - True for exhaustive (false by default),\n\ falsePositives: boolean - True to keep fps (false by defaults),\n\ accumulation : [\"db\"|\"track\"|\"one2one\"] (\"db\" by default),\n\ - distance : [\"dot\"|\"eucNorm\"|\"euclidean\"] (\"dot\" by default),\n\ + distance : [\"dot\"|\"eucNorm\"|\"euclidean\"|\"kullback\"] (\"dot\" by default),\n\ npoints : int number of points per track,\n\ ntracks : max number of results returned in db accu mode,\n\ includeKeys : list of strings to include (use all by default),\n\
--- a/bindings/python/pyadbmodule.c Thu Jun 02 16:31:44 2011 +0000 +++ b/bindings/python/pyadbmodule.c Sat Oct 15 17:28:07 2011 +0000 @@ -419,15 +419,17 @@ return NULL; } if (strcmp(distMode, "dot")==0){ - spec->params.distance = ADB_DISTANCE_DOT_PRODUCT; + spec->params.distance = ADB_DISTANCE_DOT_PRODUCT; }else if (strcmp(distMode, "eucNorm")==0){ - spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; + spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; }else if (strcmp(distMode, "euclidean")==0){ - spec->params.distance = ADB_DISTANCE_EUCLIDEAN; + spec->params.distance = ADB_DISTANCE_EUCLIDEAN; + }else if (strcmp(distMode, "kullback")==0){ + spec->params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; }else{ - PyErr_SetString(PyExc_ValueError, - "Poorly specified distance mode. distance must either be \'dot\', \'eucNorm\' or \'euclidean\'.\n"); - return NULL; + PyErr_SetString(PyExc_ValueError, + "Poorly specified distance mode. distance must either be \'dot\', \'eucNorm\' ,\'euclidean\' or \'kullback\'.\n"); + return NULL; } //set up spec->refine @@ -746,9 +748,11 @@ spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; }else if (strcmp(distMode, "euclidean")==0){ spec->params.distance = ADB_DISTANCE_EUCLIDEAN; + }else if (strcmp(distMode, "kullback")==0){ + spec->params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; }else{ PyErr_SetString(PyExc_ValueError, - "Poorly specified distance mode. distance must either be \'dot\', \'eucNorm\' or \'euclidean\'.\n"); + "Poorly specified distance mode. distance must either be \'dot\', \'eucNorm\' ,\'euclidean\' or \'kullback\'.\n"); return NULL; }
--- a/gengetopt.in Thu Jun 02 16:31:44 2011 +0000 +++ b/gengetopt.in Sat Oct 15 17:28:07 2011 +0000 @@ -80,6 +80,7 @@ section "Normalization control parameters" sectiondesc="These parameters control the normalization of feaures at query time\n" option "no_unit_norming" - "do not unit norm features when querying an L2Norm databases." flag off +option "distance_kullback" - "use symmetric kullback divergence for distance function" flag off section "Web Services" sectiondesc="These commands enable the database process to establish a connection via the internet and operate as separate client and server processes.\n"
--- a/query.cpp Thu Jun 02 16:31:44 2011 +0000 +++ b/query.cpp Sat Oct 15 17:28:07 2011 +0000 @@ -55,6 +55,7 @@ break; case ADB_DISTANCE_EUCLIDEAN_NORMED: case ADB_DISTANCE_EUCLIDEAN: + case ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE: switch(qspec->params.accumulation) { case ADB_ACCUMULATION_DB: qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints); @@ -108,6 +109,10 @@ static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) { unsigned int j, k, l, w; double *dp, *qp, *sp; + double a,b, tmp1; +#ifdef SYMMETRIC_KL + double tmp2; +#endif const unsigned wL = spec->qid.sequence_length; @@ -127,8 +132,27 @@ dp = &D[j][k]; // point to correlation cell j,k *dp = 0.0; // initialize correlation cell l = adb->header->dim; // size of vectors - while(l--) - *dp += *qp++ * *sp++; + if (spec->params.distance!=ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE){ + while(l--) + *dp += *qp++ * *sp++; + } + else{ // KL + while(l--){ + a = *qp++; + b = *sp++; + tmp1 = a * log( a / b ); + if(isnan(tmp1)) + tmp1=0.0; +#ifdef SYMMETRIC_KL + tmp2 = b * log( b / a ); + if(isnan(tmp2)) + tmp2=0.0; + *dp += ( tmp1 + tmp2 ) / 2.0; +#else + *dp += tmp1; +#endif + } + } } double* spd; @@ -461,7 +485,11 @@ if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ // Compute distance - dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); + dist = 1.0e9; + if (spec->params.distance==ADB_DISTANCE_EUCLIDEAN_NORMED || spec->params.distance==ADB_DISTANCE_EUCLIDEAN) + dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); + else if(spec->params.distance==ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE) + dist = audiodb_kullback_leibler(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); double qn = qpointers->l2norm[qPos]; double sn = dbpointers.l2norm[sPos]; switch(spec->params.distance) {