Mercurial > hg > audiodb
changeset 444:4fe90fd568fc api-inversion
No more audioDB::set_up_query{,_from_key}
Go through audiodb_query_spec_qpointers() instead. It's a little bit
horrible, but less horrible than two almost-identical separate
functions...
author | mas01cr |
---|---|
date | Wed, 24 Dec 2008 10:56:37 +0000 |
parents | cb44e57a96fa |
children | d1771f436ff7 |
files | audioDB-internals.h audioDB.h index.cpp query.cpp |
diffstat | 4 files changed, 61 insertions(+), 212 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB-internals.h Wed Dec 24 10:56:33 2008 +0000 +++ b/audioDB-internals.h Wed Dec 24 10:56:37 2008 +0000 @@ -185,3 +185,4 @@ int audiodb_read_data(adb_t *, int, int, double **, size_t *); int audiodb_insert_create_datum(adb_insert_t *, adb_datum_t *); int audiodb_free_datum(adb_datum_t *); +int audiodb_query_spec_qpointers(adb_t *, adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *);
--- a/audioDB.h Wed Dec 24 10:56:33 2008 +0000 +++ b/audioDB.h Wed Dec 24 10:56:37 2008 +0000 @@ -330,8 +330,6 @@ void error(const char* a, const char* b = "", const char *sysFunc = 0); void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); - void set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers); - void set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers, Uns32T queryIndex); void query_loop(adb_query_spec_t *spec, Uns32T queryIndex); void query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers); void initRNG();
--- a/index.cpp Wed Dec 24 10:56:33 2008 +0000 +++ b/index.cpp Wed Dec 24 10:56:37 2008 +0000 @@ -599,10 +599,9 @@ char* database = index_get_name(dbName, radius, sequenceLength); - if(query_from_key) - set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); - else - set_up_query(spec, &query_data, &query, &qpointers); + if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { + error("failed to set up qpointers"); + } VERB_LOG(1, "retrieving tracks...");
--- a/query.cpp Wed Dec 24 10:56:33 2008 +0000 +++ b/query.cpp Wed Dec 24 10:56:37 2008 +0000 @@ -4,7 +4,7 @@ #include "audioDB-internals.h" #include "accumulators.h" -static bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) { +bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) { if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) { if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) { return false; @@ -27,6 +27,7 @@ initTables(dbName, inFile); adb_query_spec_t qspec; + adb_datum_t datum = {0}; qspec.refine.flags = 0; /* FIXME: trackFile / ADB_REFINE_KEYLIST */ @@ -53,7 +54,46 @@ qspec.refine.flags |= ADB_REFINE_HOP_SIZE; } - /* FIXME qspec.qid.datum */ + if(query_from_key) { + datum.key = key; + } else { + int fd; + struct stat st; + + /* FIXME: around here there are all sorts of hideous leaks. */ + fd = open(inFile, O_RDONLY); + if(fd < 0) { + error("failed to open feature file", inFile); + } + fstat(fd, &st); + read(fd, &datum.dim, sizeof(uint32_t)); + datum.nvectors = (st.st_size - sizeof(uint32_t)) / (datum.dim * sizeof(double)); + datum.data = (double *) malloc(st.st_size - sizeof(uint32_t)); + read(fd, datum.data, st.st_size - sizeof(uint32_t)); + close(fd); + if(usingPower) { + uint32_t one; + fd = open(powerFileName, O_RDONLY); + if(fd < 0) { + error("failed to open power file", powerFileName); + } + read(fd, &one, sizeof(uint32_t)); + if(one != 1) { + error("malformed power file dimensionality", powerFileName); + } + datum.power = (double *) malloc(datum.nvectors * sizeof(double)); + if(read(fd, datum.power, datum.nvectors * sizeof(double)) != (ssize_t) (datum.nvectors * sizeof(double))) { + error("malformed power file", powerFileName); + } + close(fd); + } + if(usingTimes) { + datum.times = (double *) malloc(2 * datum.nvectors * sizeof(double)); + insertTimeStamps(datum.nvectors, timesFile, datum.times); + } + } + + qspec.qid.datum = &datum; qspec.qid.sequence_length = sequenceLength; qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE; qspec.qid.sequence_start = queryPoint; @@ -307,7 +347,7 @@ } } -static int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { +int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { adb_datum_t *datum; adb_datum_t d = {0}; uint32_t nvectors; @@ -381,7 +421,8 @@ that we care about (l2norm/power/mean duration). (This bit could conceivably become a new function) */ nvectors = d.nvectors; - if(sequence_start > nvectors - sequence_length) { + /* FIXME: check the overflow logic here */ + if(sequence_start + sequence_length > nvectors) { /* is there something to free? goto error */ return 1; } @@ -421,213 +462,24 @@ } else { *vector = *vector_data + spec->qid.sequence_start * d.dim; qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start; - qpointers->power = qpointers->power_data + spec->qid.sequence_start; + if(d.power) { + qpointers->power = qpointers->power_data + spec->qid.sequence_start; + } + /* FIXME: this is a little bit ugly. No, a lot ugly. But at the + * moment this is how query_loop() knows when to stop, so for + * now... */ + qpointers->nvectors = sequence_length; } - /* Clean up: free any bits of datum that we have ourselves * allocated. */ if(datum != &d) { audiodb_free_datum(&d); } + + return 0; } -// These names deserve some unpicking. The names starting with a "q" -// are pointers to the query, norm and power vectors; the names -// starting with "v" are things that will end up pointing to the -// actual query point's information. -- CSR, 2007-12-05 -void audioDB::set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers) { - uint32_t nvectors = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); - qpointers->nvectors = nvectors; - - uint32_t sequence_length = spec->qid.sequence_length; - if(!(dbH->flags & O2_FLAG_L2NORM)) { - error("Database must be L2 normed for sequence query","use -L2NORM"); - } - - if(nvectors < sequence_length) { - error("Query shorter than requested sequence length", "maybe use -l"); - } - - VERB_LOG(1, "performing norms... "); - - *qp = new double[nvectors * dbH->dim]; - memcpy(*qp, indata+sizeof(int), nvectors * dbH->dim * sizeof(double)); - qpointers->l2norm_data = new double[nvectors]; - audiodb_l2norm_buffer(*qp, dbH->dim, nvectors, qpointers->l2norm_data); - - audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); - audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); - - if (usingPower) { - qpointers->power_data = new double[nvectors]; - if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { - error("error seeking to data", powerFileName, "lseek"); - } - int count = read(powerfd, qpointers->power_data, nvectors * sizeof(double)); - if (count == -1) { - error("error reading data", powerFileName, "read"); - } - if ((unsigned) count != nvectors * sizeof(double)) { - error("short read", powerFileName); - } - - audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); - audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); - } - - if (usingTimes) { - unsigned int k; - qpointers->mean_duration = new double[1]; - *qpointers->mean_duration = 0.0; - double *querydurs = new double[nvectors]; - double *timesdata = new double[2*nvectors]; - insertTimeStamps(nvectors, timesFile, timesdata); - for(k = 0; k < nvectors; k++) { - querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; - *qpointers->mean_duration += querydurs[k]; - } - *qpointers->mean_duration /= k; - - VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration); - - delete [] querydurs; - delete [] timesdata; - } - - // Defaults, for exhaustive search (!usingQueryPoint) - *vqp = *qp; - qpointers->l2norm = qpointers->l2norm_data; - qpointers->power = qpointers->power_data; - - if(usingQueryPoint) { - if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) { - error("queryPoint >= nvectors-sequence_length+1 in query"); - } else { - VERB_LOG(1, "query point: %u\n", queryPoint); - *vqp = *qp + queryPoint * dbH->dim; - qpointers->l2norm = qpointers->l2norm_data + queryPoint; - if (usingPower) { - qpointers->power = qpointers->power_data + queryPoint; - } - qpointers->nvectors = sequence_length; - } - } -} - -// Does the same as set_up_query(...) but from database features instead of from a file -// Constructs the same outputs as set_up_query -void audioDB::set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers, Uns32T queryIndex) { - uint32_t sequence_length = spec->qid.sequence_length; - if(!trackTable) - error("trackTable not initialized","set_up_query_from_key"); - - if(!(dbH->flags & O2_FLAG_L2NORM)) { - error("Database must be L2 normed for sequence query","use -L2NORM"); - } - - if(dbH->flags & O2_FLAG_POWER) - usingPower = true; - - if(dbH->flags & O2_FLAG_TIMES) - usingTimes = true; - - uint32_t nvectors = trackTable[queryIndex]; - qpointers->nvectors = nvectors; - if(nvectors < sequence_length) { - error("Query shorter than requested sequence length", "maybe use -l"); - } - - VERB_LOG(1, "performing norms... "); - - // For LARGE_ADB load query features from file - if( dbH->flags & O2_FLAG_LARGE_ADB ){ - if(infid>0) - close(infid); - char* prefixedString = new char[O2_MAXFILESTR]; - char* tmpStr = prefixedString; - strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); - prefix_name(&prefixedString, adb_feature_root); - if(tmpStr!=prefixedString) - delete[] tmpStr; - initInputFile(prefixedString, false); // nommap, file pointer at correct position - size_t allocatedSize = 0; - if(audiodb_read_data(adb, infid, queryIndex, qp, &allocatedSize)) - error("failed to read data"); // over-writes qp and allocatedSize - // Consistency check on allocated memory and query feature size - if(nvectors*sizeof(double)*dbH->dim != allocatedSize) - error("Query memory allocation failed consitency check","set_up_query_from_key"); - // Allocated and calculate auxillary sequences: l2norm and power - init_track_aux_data(queryIndex, *qp, &qpointers->l2norm_data, &qpointers->l2norm, &qpointers->power_data, &qpointers->power); - } - else{ // Load from self-contained ADB database - // Read query feature vectors from database - *qp = NULL; - lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET); - size_t allocatedSize = 0; - if(audiodb_read_data(adb, dbfid, queryIndex, qp, &allocatedSize)) - error("failed to read data"); - // Consistency check on allocated memory and query feature size - if(nvectors*sizeof(double)*dbH->dim != allocatedSize) - error("Query memory allocation failed consitency check","set_up_query_from_key"); - - Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors - // Copy L2 norm partial-sum coefficients - assert(qpointers->l2norm_data = new double[nvectors]); - memcpy(qpointers->l2norm_data, l2normTable+trackIndexOffset, nvectors*sizeof(double)); - audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); - audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); - - if( usingPower ){ - // Copy Power partial-sum coefficients - assert(qpointers->power_data = new double[nvectors]); - memcpy(qpointers->power_data, powerTable+trackIndexOffset, nvectors*sizeof(double)); - audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); - audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); - } - - if (usingTimes) { - unsigned int k; - qpointers->mean_duration = new double[1]; - *qpointers->mean_duration = 0.0; - double *querydurs = new double[nvectors]; - double *timesdata = new double[nvectors*2]; - assert(querydurs && timesdata); - memcpy(timesdata, timesTable+trackIndexOffset, nvectors*sizeof(double)); - for(k = 0; k < nvectors; k++) { - querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; - *qpointers->mean_duration += querydurs[k]; - } - *qpointers->mean_duration /= k; - - VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration); - - delete [] querydurs; - delete [] timesdata; - } - } - - // Defaults, for exhaustive search (!usingQueryPoint) - *vqp = *qp; - qpointers->l2norm = qpointers->l2norm_data; - qpointers->power = qpointers->power_data; - - if(usingQueryPoint) { - if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) { - error("queryPoint >= nvectors-sequence_length+1 in query"); - } else { - VERB_LOG(1, "query point: %u\n", queryPoint); - *vqp = *qp + queryPoint * dbH->dim; - qpointers->l2norm = qpointers->l2norm_data + queryPoint; - if (usingPower) { - qpointers->power = qpointers->power_data + queryPoint; - } - qpointers->nvectors = sequence_length; - } - } -} - - // FIXME: this is not the right name; we're not actually setting up // the database, but copying various bits of it out of mmap()ed tables // in order to reduce seeks. @@ -854,10 +706,9 @@ if( dbH->flags & O2_FLAG_LARGE_ADB ) error("error: LARGE_ADB requires indexed query"); - if(query_from_key) - set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); - else - set_up_query(spec, &query_data, &query, &qpointers); + if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { + error("failed to set up qpointers"); + } if(audiodb_set_up_db(adb, spec, &dbpointers)) { error("failed to set up db");