# HG changeset patch # User mas01mc # Date 1233236687 0 # Node ID 561339c7fd2609d4748351c185fa484ee7946c17 # Parent 7ee6a2701d90d09ce7fb495c088330514103b5a8 Added audiodb_datum_qpointers_partial() to make exact evaluation of LSH results more efficient. This routine calculates partial sums only for those vectors needed. diff -r 7ee6a2701d90 -r 561339c7fd26 Makefile --- a/Makefile Wed Jan 28 18:55:46 2009 +0000 +++ b/Makefile Thu Jan 29 13:44:47 2009 +0000 @@ -17,19 +17,17 @@ MINORVERSION=0 LIBRARY=lib$(EXECUTABLE).so.$(SOVERSION).$(MINORVERSION) -override CFLAGS+=-ggdb -g -fPIC +override CFLAGS+=-O3 -fPIC -pg # set to DUMP hashtables on QUERY load #override CFLAGS+=-DLSH_DUMP_CORE_TABLES # set to turn on debugging information for LSH hashtables -override CFLAGS+=-D_LSH_DEBUG_ +#override CFLAGS+=-D_LSH_DEBUG_ # set to increase multiple probes in LSH QUERY (allowable range = 1 ... lsh_k*2) #override CFLAGS+=-DLSH_MULTI_PROBE_COUNT=10 - - ifeq ($(shell uname),Linux) override CFLAGS+=-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 endif diff -r 7ee6a2701d90 -r 561339c7fd26 audioDB-internals.h --- a/audioDB-internals.h Wed Jan 28 18:55:46 2009 +0000 +++ b/audioDB-internals.h Thu Jan 29 13:44:47 2009 +0000 @@ -297,6 +297,7 @@ int audiodb_track_id_datum(adb_t *, uint32_t, adb_datum_t *); int audiodb_free_datum(adb_datum_t *); int audiodb_datum_qpointers(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *); +int audiodb_datum_qpointers_partial(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *, adb_qstate_internal_t *); int audiodb_query_spec_qpointers(adb_t *, const adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *); int audiodb_query_queue_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *, double *, adb_qpointers_internal_t *); int audiodb_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *); diff -r 7ee6a2701d90 -r 561339c7fd26 query-indexed.cpp --- a/query-indexed.cpp Wed Jan 28 18:55:46 2009 +0000 +++ b/query-indexed.cpp Thu Jan 29 13:44:47 2009 +0000 @@ -115,21 +115,11 @@ return -1; } -#ifdef _LSH_DEBUG_ - cout << "spec_qpointers..."; - cout.flush(); -#endif - if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { delete [] database; return -1; } -#ifdef _LSH_DEBUG_ - cout << "done" << endl; - cout.flush(); -#endif - uint32_t Nq = (qpointers.nvectors > ADB_LSH_MAXTRACKLEN ? ADB_LSH_MAXTRACKLEN : qpointers.nvectors) - sequence_length + 1; std::vector > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length); diff -r 7ee6a2701d90 -r 561339c7fd26 query.cpp --- a/query.cpp Wed Jan 28 18:55:46 2009 +0000 +++ b/query.cpp Thu Jan 29 13:44:47 2009 +0000 @@ -280,6 +280,58 @@ return 0; } +int audiodb_datum_qpointers_partial(adb_datum_t *d, uint32_t sequence_length, double **vector_data, + double **vector, adb_qpointers_internal_t *qpointers, + adb_qstate_internal_t *qstate){ + uint32_t nvectors = d->nvectors; + qpointers->nvectors = nvectors; + std::priority_queue ppairs(*qstate->exact_evaluation_queue); + + size_t vector_size = nvectors * sizeof(double) * d->dim; + *vector_data = new double[vector_size]; + qpointers->l2norm_data = new double[vector_size / d->dim]; + if(d->power) + qpointers->power_data = new double[vector_size / d->dim]; + + uint32_t seq_len_dbl = sequence_length*d->dim*sizeof(double); + PointPair pp = ppairs.top(); + uint32_t tid = pp.trackID; + + while( !ppairs.empty() && pp.trackID==tid){ + uint32_t spos = pp.spos; + uint32_t spos_dim = spos*d->dim; + + memcpy(*vector_data+spos_dim, d->data+spos_dim, seq_len_dbl); + + audiodb_l2norm_buffer(*vector_data+spos_dim, d->dim, 1, qpointers->l2norm_data+spos); + audiodb_sequence_sum(qpointers->l2norm_data+spos, sequence_length, sequence_length); + audiodb_sequence_sqrt(qpointers->l2norm_data+spos, sequence_length, sequence_length); + + if(d->power) { + memcpy(qpointers->power_data+spos, d->power+spos, seq_len_dbl); + audiodb_sequence_sum(qpointers->power_data+spos, sequence_length, sequence_length); + audiodb_sequence_average(qpointers->power_data+spos, sequence_length, sequence_length); + } + ppairs.pop(); + if(!ppairs.empty()) + pp = ppairs.top(); + } + + if(d->times) { + qpointers->mean_duration = new double[1]; + *qpointers->mean_duration = 0; + for(unsigned int k = 0; k < nvectors; k++) { + *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k]; + } + *qpointers->mean_duration /= nvectors; + } + + *vector = *vector_data; + qpointers->l2norm = qpointers->l2norm_data; + qpointers->power = qpointers->power_data; + return 0; +} + int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { adb_datum_t *datum; adb_datum_t d = {0}; @@ -466,7 +518,7 @@ delete qstate->exact_evaluation_queue; return 1; } - if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) { + if(audiodb_datum_qpointers_partial(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers, qstate)) { delete qstate->exact_evaluation_queue; audiodb_free_datum(&d); return 1;