Mercurial > hg > audiodb
diff query.cpp @ 498:342822c2d49a
Merge api-inversion branch (-r656:771, but I don't expect to return to
that branch) into the trunk.
I expect there to be minor performance regressions (e.g. in the SOAP
server index cacheing, which I have forcibly removed) and minor
unplugged memory leaks (e.g. in audioDB::query(), where I don't free up
the datum). I hope that these leaks and performance regressions can be
plugged in short order. I also expect that some (but maybe not all) of
the issues currently addressed in the memory-leaks branch are superseded
or fixed by this merge.
There remains much work to be done; go forth and do it.
author | mas01cr |
---|---|
date | Sat, 10 Jan 2009 16:47:57 +0000 |
parents | fd890d2b38da |
children | cc2b97d020b1 |
line wrap: on
line diff
--- a/query.cpp Sat Jan 10 11:11:27 2009 +0000 +++ b/query.cpp Sat Jan 10 16:47:57 2009 +0000 @@ -1,194 +1,132 @@ #include "audioDB.h" -#include "reporter.h" +#include "audioDB-internals.h" +#include "accumulators.h" -bool audioDB::powers_acceptable(double p1, double p2) { - if (use_absolute_threshold) { - if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { +bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) { + if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) { + if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) { return false; } } - if (use_relative_threshold) { - if (fabs(p1-p2) > fabs(relative_threshold)) { + if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) { + if (fabs(p1-p2) > fabs(r->relative_threshold)) { return false; } } return true; } -void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { - // init database tables and dbH first - if(query_from_key) - initTables(dbName); - else - initTables(dbName, inFile); +adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) { + adb_qstate_internal_t qstate = {0}; + qstate.allowed_keys = new std::set<std::string>; + adb_query_results_t *results; + if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) { + for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) { + qstate.allowed_keys->insert(qspec->refine.include.keys[k]); + } + } else { + for(unsigned int k = 0; k < adb->header->numFiles; k++) { + qstate.allowed_keys->insert((*adb->keys)[k]); + } + } + if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) { + for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) { + qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]); + } + } - // keyKeyPos requires dbH to be initialized - if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND)) - error("Query key not found :",key); - - switch (queryType) { - case O2_POINT_QUERY: - sequenceLength = 1; - normalizedDistance = false; - reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN); - break; - case O2_TRACK_QUERY: - sequenceLength = 1; - normalizedDistance = false; - reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles); - break; - case O2_SEQUENCE_QUERY: - if(no_unit_norming) - normalizedDistance = false; - if(radius == 0) { - reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles); - } else { - if(index_exists(dbName, radius, sequenceLength)){ - char* indexName = index_get_name(dbName, radius, sequenceLength); - lsh = index_allocate(indexName, false); - reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); - delete[] indexName; - } - else - reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); + switch(qspec->params.distance) { + case ADB_DISTANCE_DOT_PRODUCT: + switch(qspec->params.accumulation) { + case ADB_ACCUMULATION_DB: + qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints); + break; + case ADB_ACCUMULATION_PER_TRACK: + qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks); + break; + case ADB_ACCUMULATION_ONE_TO_ONE: + qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>(); + break; + default: + goto error; } break; - case O2_N_SEQUENCE_QUERY: - if(no_unit_norming) - normalizedDistance = false; - if(radius == 0) { - reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles); - } else { - if(index_exists(dbName, radius, sequenceLength)){ - char* indexName = index_get_name(dbName, radius, sequenceLength); - lsh = index_allocate(indexName, false); - reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); - delete[] indexName; - } - else - reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles); - } - break; - case O2_ONE_TO_ONE_N_SEQUENCE_QUERY : - if(no_unit_norming) - normalizedDistance = false; - if(radius == 0) { - error("query-type not yet supported"); - } - else { - if(index_exists(dbName, radius, sequenceLength)){ - char* indexName = index_get_name(dbName, radius, sequenceLength); - lsh = index_allocate(indexName, false); - reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); - delete[] indexName; - } - else - reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles); + case ADB_DISTANCE_EUCLIDEAN_NORMED: + case ADB_DISTANCE_EUCLIDEAN: + switch(qspec->params.accumulation) { + case ADB_ACCUMULATION_DB: + qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints); + break; + case ADB_ACCUMULATION_PER_TRACK: + qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks); + break; + case ADB_ACCUMULATION_ONE_TO_ONE: + qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>(); + break; + default: + goto error; } break; default: - error("unrecognized queryType in query()"); - } - - // Test for index (again) here - if(radius && index_exists(dbName, radius, sequenceLength)){ - VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength); - index_query_loop(dbName, query_from_key_index); + goto error; } - else{ - VERB_LOG(1, "Calling brute-force query on database %s\n", dbName); - query_loop(dbName, query_from_key_index); + + if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) { + if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) { + goto error; + } + } else { + if(audiodb_query_loop(adb, qspec, &qstate)) { + goto error; + } } - reporter->report(fileTable, adbQueryResponse); + results = qstate.accumulator->get_points(); + + delete qstate.accumulator; + delete qstate.allowed_keys; + + return results; + + error: + if(qstate.accumulator) + delete qstate.accumulator; + if(qstate.allowed_keys) + delete qstate.allowed_keys; + return NULL; } -// return ordinal position of key in keyTable -// this should really be a STL hash map search -unsigned audioDB::getKeyPos(char* key){ - if(!dbH) - error("dbH not initialized","getKeyPos"); - for(unsigned k=0; k<dbH->numFiles; k++) - if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0) - return k; - error("Key not found",key); - return O2_ERR_KEYNOTFOUND; +int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) { + free(rs->results); + free(rs); + return 0; } -// This is a common pattern in sequence queries: what we are doing is -// taking a window of length seqlen over a buffer of length length, -// and placing the sum of the elements in that window in the first -// element of the window: thus replacing all but the last seqlen -// elements in the buffer with the corresponding windowed sum. -void audioDB::sequence_sum(double *buffer, int length, int seqlen) { - double tmp1, tmp2, *ps; - int j, w; - - tmp1 = *buffer; - j = 1; - w = seqlen - 1; - while(w--) { - *buffer += buffer[j++]; - } - ps = buffer + 1; - w = length - seqlen; // +1 - 1 - while(w--) { - tmp2 = *ps; - if(isfinite(tmp1)) { - *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); - } else { - for(int i = 1; i < seqlen; i++) { - *ps += *(ps + i); - } - } - tmp1 = tmp2; - ps++; - } -} - -// In contrast to sequence_sum() above, sequence_sqrt() and -// sequence_average() below are simple mappers across the sequence. -void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) { - int w = length - seqlen + 1; - while(w--) { - *buffer = sqrt(*buffer); - buffer++; - } -} - -void audioDB::sequence_average(double *buffer, int length, int seqlen) { - int w = length - seqlen + 1; - while(w--) { - *buffer /= seqlen; - buffer++; - } -} - -void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) { +static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) { unsigned int j, k, l, w; double *dp, *qp, *sp; - const unsigned HOP_SIZE = sequenceHop; - const unsigned wL = sequenceLength; + const unsigned HOP_SIZE = spec->refine.hopsize; + const unsigned wL = spec->qid.sequence_length; for(j = 0; j < numVectors; j++) { // Sum products matrix - D[j] = new double[trackTable[track]]; + D[j] = new double[(*adb->track_lengths)[track]]; assert(D[j]); // Matched filter matrix - DD[j]=new double[trackTable[track]]; + DD[j]=new double[(*adb->track_lengths)[track]]; assert(DD[j]); } // Dot product for(j = 0; j < numVectors; j++) - for(k = 0; k < trackTable[track]; k++){ - qp = query + j * dbH->dim; - sp = data_buffer + k * dbH->dim; + for(k = 0; k < (*adb->track_lengths)[track]; k++){ + qp = query + j * adb->header->dim; + sp = data_buffer + k * adb->header->dim; DD[j][k] = 0.0; // Initialize matched filter array dp = &D[j][k]; // point to correlation cell j,k *dp = 0.0; // initialize correlation cell - l = dbH->dim; // size of vectors + l = adb->header->dim; // size of vectors while(l--) *dp += *qp++ * *sp++; } @@ -201,7 +139,7 @@ for(j = 0; j < numVectors - w; j++) { sp = DD[j]; spd = D[j+w] + w; - k = trackTable[track] - w; + k = (*adb->track_lengths)[track] - w; while(k--) *sp++ += *spd++; } @@ -211,7 +149,7 @@ for(j = 0; j < numVectors - w; j += HOP_SIZE) { sp = DD[j]; spd = D[j+w]+w; - for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) { + for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) { *sp += *spd; sp += HOP_SIZE; spd += HOP_SIZE; @@ -221,7 +159,7 @@ } } -void audioDB::delete_arrays(int track, unsigned int numVectors, double **D, double **DD) { +static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) { if(D != NULL) { for(unsigned int j = 0; j < numVectors; j++) { delete[] D[j]; @@ -234,555 +172,467 @@ } } -void audioDB::read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) { - if (trackTable[track] * sizeof(double) * dbH->dim > *data_buffer_size_p) { +int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) { + uint32_t track_length = (*adb->track_lengths)[track]; + size_t track_size = track_length * sizeof(double) * adb->header->dim; + if (track_size > *data_buffer_size_p) { if(*data_buffer_p) { free(*data_buffer_p); } { - *data_buffer_size_p = trackTable[track] * sizeof(double) * dbH->dim; - void *tmp = malloc(*data_buffer_size_p); + *data_buffer_size_p = track_size; + void *tmp = malloc(track_size); if (tmp == NULL) { - error("error allocating data buffer"); + goto error; } *data_buffer_p = (double *) tmp; } } - CHECKED_READ(trkfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim); + read_or_goto_error(trkfid, *data_buffer_p, track_size); + return 0; + + error: + return 1; } -// These names deserve some unpicking. The names starting with a "q" -// are pointers to the query, norm and power vectors; the names -// starting with "v" are things that will end up pointing to the -// actual query point's information. -- CSR, 2007-12-05 -void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) { - *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); +int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) { + off_t track_offset = (*adb->track_offsets)[track_id]; + if(adb->header->flags & O2_FLAG_LARGE_ADB) { + /* create a reference/insert, then use adb_insert_create_datum() */ + adb_reference_t reference = {0}; + char features[MAXSTR], power[MAXSTR], times[MAXSTR]; + lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); + read_or_goto_error(adb->fd, features, MAXSTR); + reference.features = features; + if(adb->header->flags & O2_FLAG_POWER) { + lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); + read_or_goto_error(adb->fd, power, MAXSTR); + reference.power = power; + } + if(adb->header->flags & O2_FLAG_TIMES) { + lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); + read_or_goto_error(adb->fd, times, MAXSTR); + reference.times = times; + } + return audiodb_insert_create_datum(&reference, d); + } else { + /* initialize from sources of data that we already have */ + d->nvectors = (*adb->track_lengths)[track_id]; + d->dim = adb->header->dim; + d->key = (*adb->keys)[track_id].c_str(); + /* read out stuff from the database tables */ + d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double)); + lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET); + read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double)); + if(adb->header->flags & O2_FLAG_POWER) { + d->power = (double *) malloc(d->nvectors * sizeof(double)); + lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET); + read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double)); + } + if(adb->header->flags & O2_FLAG_TIMES) { + d->times = (double *) malloc(2 * d->nvectors * sizeof(double)); + lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET); + read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double)); + } + return 0; + } + error: + audiodb_free_datum(d); + return 1; +} - if(!(dbH->flags & O2_FLAG_L2NORM)) { - error("Database must be L2 normed for sequence query","use -L2NORM"); +int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { + uint32_t nvectors = d->nvectors; + + qpointers->nvectors = nvectors; + + size_t vector_size = nvectors * sizeof(double) * d->dim; + *vector_data = new double[vector_size]; + memcpy(*vector_data, d->data, vector_size); + + qpointers->l2norm_data = new double[vector_size / d->dim]; + audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data); + audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); + audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); + + if(d->power) { + qpointers->power_data = new double[vector_size / d->dim]; + memcpy(qpointers->power_data, d->power, vector_size / d->dim); + audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); + audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); } - if(*nvp < sequenceLength) { - error("Query shorter than requested sequence length", "maybe use -l"); - } - - VERB_LOG(1, "performing norms... "); - - *qp = new double[*nvp * dbH->dim]; - memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double)); - *qnp = new double[*nvp]; - unitNorm(*qp, dbH->dim, *nvp, *qnp); - - sequence_sum(*qnp, *nvp, sequenceLength); - sequence_sqrt(*qnp, *nvp, sequenceLength); - - if (usingPower) { - *qpp = new double[*nvp]; - if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { - error("error seeking to data", powerFileName, "lseek"); + if(d->times) { + qpointers->mean_duration = new double[1]; + *qpointers->mean_duration = 0; + for(unsigned int k = 0; k < nvectors; k++) { + *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k]; } - int count = read(powerfd, *qpp, *nvp * sizeof(double)); - if (count == -1) { - error("error reading data", powerFileName, "read"); - } - if ((unsigned) count != *nvp * sizeof(double)) { - error("short read", powerFileName); - } - - sequence_sum(*qpp, *nvp, sequenceLength); - sequence_average(*qpp, *nvp, sequenceLength); + *qpointers->mean_duration /= nvectors; } - if (usingTimes) { - unsigned int k; - *mqdp = 0.0; - double *querydurs = new double[*nvp]; - double *timesdata = new double[*nvp*2]; - insertTimeStamps(*nvp, timesFile, timesdata); - for(k = 0; k < *nvp; k++) { - querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; - *mqdp += querydurs[k]; + *vector = *vector_data; + qpointers->l2norm = qpointers->l2norm_data; + qpointers->power = qpointers->power_data; + return 0; +} + +int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { + adb_datum_t *datum; + adb_datum_t d = {0}; + uint32_t sequence_length; + uint32_t sequence_start; + + datum = spec->qid.datum; + sequence_length = spec->qid.sequence_length; + sequence_start = spec->qid.sequence_start; + + if(datum->data) { + if(datum->dim != adb->header->dim) { + return 1; } - *mqdp /= k; - - VERB_LOG(1, "mean query file duration: %f\n", *mqdp); - - delete [] querydurs; - delete [] timesdata; + /* initialize d, and mark that nothing needs freeing later. */ + d = *datum; + datum = &d; + } else if (datum->key) { + uint32_t track_id; + if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) { + return 1; + } + audiodb_track_id_datum(adb, track_id, &d); + } else { + return 1; } - // Defaults, for exhaustive search (!usingQueryPoint) - *vqp = *qp; - *vqnp = *qnp; - *vqpp = *qpp; + /* FIXME: check the overflow logic here */ + if(sequence_start + sequence_length > d.nvectors) { + if(datum != &d) { + audiodb_free_datum(&d); + } + return 1; + } - if(usingQueryPoint) { - if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) { - error("queryPoint >= numVectors-sequenceLength+1 in query"); - } else { - VERB_LOG(1, "query point: %u\n", queryPoint); - *vqp = *qp + queryPoint * dbH->dim; - *vqnp = *qnp + queryPoint; - if (usingPower) { - *vqpp = *qpp + queryPoint; - } - *nvp = sequenceLength; + audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers); + + /* Finally, if applicable, set up the moving qpointers. */ + if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) { + /* the qpointers are already at the start, and so correct. */ + } else { + /* adjust the qpointers to point to the correct place in the sequence */ + *vector = *vector_data + spec->qid.sequence_start * d.dim; + qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start; + if(d.power) { + qpointers->power = qpointers->power_data + spec->qid.sequence_start; } + qpointers->nvectors = sequence_length; } + + /* Clean up: free any bits of datum that we have ourselves + * allocated. */ + if(datum != &d) { + audiodb_free_datum(&d); + } + + return 0; } -// Does the same as set_up_query(...) but from database features instead of from a file -// Constructs the same outputs as set_up_query -void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) { - if(!trackTable) - error("trackTable not initialized","set_up_query_from_key"); +static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) { + uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double)); + uint32_t sequence_length = spec->qid.sequence_length; - if(!(dbH->flags & O2_FLAG_L2NORM)) { - error("Database must be L2 normed for sequence query","use -L2NORM"); + bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); + bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO; + double *times_table = NULL; + + + dbpointers->nvectors = nvectors; + dbpointers->l2norm_data = new double[nvectors]; + + double *snpp = dbpointers->l2norm_data, *sppp = 0; + lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET); + read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double)); + + if (using_power) { + if (!(adb->header->flags & O2_FLAG_POWER)) { + goto error; + } + dbpointers->power_data = new double[nvectors]; + sppp = dbpointers->power_data; + lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET); + read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double)); } - - if(dbH->flags & O2_FLAG_POWER) - usingPower = true; - - if(dbH->flags & O2_FLAG_TIMES) - usingTimes = true; - *nvp = trackTable[queryIndex]; - if(*nvp < sequenceLength) { - error("Query shorter than requested sequence length", "maybe use -l"); - } - - VERB_LOG(1, "performing norms... "); - - // For LARGE_ADB load query features from file - if( dbH->flags & O2_FLAG_LARGE_ADB ){ - if(infid>0) - close(infid); - char* prefixedString = new char[O2_MAXFILESTR]; - char* tmpStr = prefixedString; - strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); - prefix_name(&prefixedString, adb_feature_root); - if(tmpStr!=prefixedString) - delete[] tmpStr; - initInputFile(prefixedString, false); // nommap, file pointer at correct position - size_t allocatedSize = 0; - read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize - // Consistency check on allocated memory and query feature size - if(*nvp*sizeof(double)*dbH->dim != allocatedSize) - error("Query memory allocation failed consitency check","set_up_query_from_key"); - // Allocated and calculate auxillary sequences: l2norm and power - init_track_aux_data(queryIndex, *qp, qnp, vqnp, qpp, vqpp); - } - else{ // Load from self-contained ADB database - // Read query feature vectors from database - *qp = NULL; - lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET); - size_t allocatedSize = 0; - read_data(dbfid, queryIndex, qp, &allocatedSize); - // Consistency check on allocated memory and query feature size - if(*nvp*sizeof(double)*dbH->dim != allocatedSize) - error("Query memory allocation failed consitency check","set_up_query_from_key"); - - Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors - // Copy L2 norm partial-sum coefficients - assert(*qnp = new double[*nvp]); - memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double)); - sequence_sum(*qnp, *nvp, sequenceLength); - sequence_sqrt(*qnp, *nvp, sequenceLength); - - if( usingPower ){ - // Copy Power partial-sum coefficients - assert(*qpp = new double[*nvp]); - memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double)); - sequence_sum(*qpp, *nvp, sequenceLength); - sequence_average(*qpp, *nvp, sequenceLength); + for(unsigned int i = 0; i < adb->header->numFiles; i++){ + size_t track_length = (*adb->track_lengths)[i]; + if(track_length >= sequence_length) { + audiodb_sequence_sum(snpp, track_length, sequence_length); + audiodb_sequence_sqrt(snpp, track_length, sequence_length); + if (using_power) { + audiodb_sequence_sum(sppp, track_length, sequence_length); + audiodb_sequence_average(sppp, track_length, sequence_length); + } } - - if (usingTimes) { - unsigned int k; - *mqdp = 0.0; - double *querydurs = new double[*nvp]; - double *timesdata = new double[*nvp*2]; - assert(querydurs && timesdata); - memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double)); - for(k = 0; k < *nvp; k++) { - querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; - *mqdp += querydurs[k]; - } - *mqdp /= k; - - VERB_LOG(1, "mean query file duration: %f\n", *mqdp); - - delete [] querydurs; - delete [] timesdata; + snpp += track_length; + if (using_power) { + sppp += track_length; } } - // Defaults, for exhaustive search (!usingQueryPoint) - *vqp = *qp; - *vqnp = *qnp; - *vqpp = *qpp; + if (using_times) { + if(!(adb->header->flags & O2_FLAG_TIMES)) { + goto error; + } - if(usingQueryPoint) { - if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) { - error("queryPoint >= numVectors-sequenceLength+1 in query"); - } else { - VERB_LOG(1, "query point: %u\n", queryPoint); - *vqp = *qp + queryPoint * dbH->dim; - *vqnp = *qnp + queryPoint; - if (usingPower) { - *vqpp = *qpp + queryPoint; + dbpointers->mean_duration = new double[adb->header->numFiles]; + + times_table = (double *) malloc(2 * nvectors * sizeof(double)); + if(!times_table) { + goto error; + } + lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET); + read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double)); + for(unsigned int k = 0; k < adb->header->numFiles; k++) { + size_t track_length = (*adb->track_lengths)[k]; + unsigned int j; + dbpointers->mean_duration[k] = 0.0; + for(j = 0; j < track_length; j++) { + dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j]; } - *nvp = sequenceLength; + dbpointers->mean_duration[k] /= j; } + + free(times_table); + times_table = NULL; } + + dbpointers->l2norm = dbpointers->l2norm_data; + dbpointers->power = dbpointers->power_data; + return 0; + + error: + if(dbpointers->l2norm_data) { + delete [] dbpointers->l2norm_data; + } + if(dbpointers->power_data) { + delete [] dbpointers->power_data; + } + if(dbpointers->mean_duration) { + delete [] dbpointers->mean_duration; + } + if(times_table) { + free(times_table); + } + return 1; + } +int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) { + adb_qpointers_internal_t dbpointers = {0}; -// FIXME: this is not the right name; we're not actually setting up -// the database, but copying various bits of it out of mmap()ed tables -// in order to reduce seeks. -void audioDB::set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) { - *dvp = dbH->length / (dbH->dim * sizeof(double)); - *snp = new double[*dvp]; + uint32_t sequence_length = spec->qid.sequence_length; + bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); - double *snpp = *snp, *sppp = 0; - memcpy(*snp, l2normTable, *dvp * sizeof(double)); - - if (usingPower) { - if (!(dbH->flags & O2_FLAG_POWER)) { - error("database not power-enabled", dbName); - } - *spp = new double[*dvp]; - sppp = *spp; - memcpy(*spp, powerTable, *dvp * sizeof(double)); + if(qstate->exact_evaluation_queue->size() == 0) { + return 0; } - for(unsigned int i = 0; i < dbH->numFiles; i++){ - if(trackTable[i] >= sequenceLength) { - sequence_sum(snpp, trackTable[i], sequenceLength); - sequence_sqrt(snpp, trackTable[i], sequenceLength); - - if (usingPower) { - sequence_sum(sppp, trackTable[i], sequenceLength); - sequence_average(sppp, trackTable[i], sequenceLength); + /* We are guaranteed that the order of points is sorted by: + * {trackID, spos, qpos} so we can be relatively efficient in + * initialization of track data. We assume that points usually + * don't overlap, so we will use exhaustive dot product evaluation + * (instead of memoization of partial sums, as in query_loop()). + */ + double dist; + double *dbdata = 0, *dbdata_pointer; + Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range + Uns32T npairs = qstate->exact_evaluation_queue->size(); + while(npairs--) { + PointPair pp = qstate->exact_evaluation_queue->top(); + if(currentTrack != pp.trackID) { + SAFE_DELETE_ARRAY(dbdata); + SAFE_DELETE_ARRAY(dbpointers.l2norm_data); + SAFE_DELETE_ARRAY(dbpointers.power_data); + SAFE_DELETE_ARRAY(dbpointers.mean_duration); + currentTrack = pp.trackID; + adb_datum_t d = {0}; + if(audiodb_track_id_datum(adb, pp.trackID, &d)) { + delete qstate->exact_evaluation_queue; + return 1; + } + if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) { + delete qstate->exact_evaluation_queue; + audiodb_free_datum(&d); + return 1; + } + audiodb_free_datum(&d); + } + Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0; + Uns32T sPos = pp.spos; // index into l2norm table + // Test power thresholds before computing distance + if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && + ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ + // Compute distance + dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); + double qn = qpointers->l2norm[qPos]; + double sn = dbpointers.l2norm[sPos]; + switch(spec->params.distance) { + case ADB_DISTANCE_EUCLIDEAN_NORMED: + dist = 2 - (2/(qn*sn))*dist; + break; + case ADB_DISTANCE_EUCLIDEAN: + dist = qn*qn + sn*sn - 2*dist; + break; + } + if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || + dist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { + adb_result_t r; + r.key = (*adb->keys)[pp.trackID].c_str(); + r.dist = dist; + r.qpos = pp.qpos; + r.ipos = pp.spos; + qstate->accumulator->add_point(&r); } } - snpp += trackTable[i]; - if (usingPower) { - sppp += trackTable[i]; - } - } - - if (usingTimes) { - if(!(dbH->flags & O2_FLAG_TIMES)) { - error("query timestamps provided for non-timed database", dbName); - } - - *mddp = new double[dbH->numFiles]; - - for(unsigned int k = 0; k < dbH->numFiles; k++) { - unsigned int j; - (*mddp)[k] = 0.0; - for(j = 0; j < trackTable[k]; j++) { - (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j]; - } - (*mddp)[k] /= j; - } - } - - *vsnp = *snp; - *vspp = *spp; -} - -// query_points() -// -// using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair -// and insert result into the current reporter. -// -// Preconditions: -// A query inFile has been opened with setup_query(...) and query pointers initialized -// The database contains some points -// An exact_evaluation_queue has been allocated and populated -// A reporter has been allocated -// -// Postconditions: -// reporter contains the points and distances that meet the reporter constraints - -void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){ - unsigned int dbVectors; - double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0; - double *meanDBdur = 0; - - // check pre-conditions - assert(exact_evaluation_queue&&reporter); - if(!exact_evaluation_queue->size()) // Exit if no points to evaluate - return; - - // Compute database info - // FIXME: we more than likely don't need very much of the database - // so make a new method to build these values per-track or, even better, per-point - if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) - set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors); - - VERB_LOG(1, "matching points..."); - - assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(trackNN>0 && trackNN<=O2_MAXNN); - - // We are guaranteed that the order of points is sorted by: - // trackID, spos, qpos - // so we can be relatively efficient in initialization of track data. - // Here we assume that points don't overlap, so we will use exhaustive dot - // product evaluation instead of memoization of partial sums which is used - // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop() - double dist; - size_t data_buffer_size = 0; - double *data_buffer = 0; - Uns32T trackOffset = 0; - Uns32T trackIndexOffset = 0; - Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range - Uns32T npairs = exact_evaluation_queue->size(); - while(npairs--){ - PointPair pp = exact_evaluation_queue->top(); - // Large ADB track data must be loaded here for sPower - if(dbH->flags & O2_FLAG_LARGE_ADB){ - trackOffset=0; - trackIndexOffset=0; - if(currentTrack!=pp.trackID){ - char* prefixedString = new char[O2_MAXFILESTR]; - char* tmpStr = prefixedString; - // On currentTrack change, allocate and load track data - currentTrack=pp.trackID; - SAFE_DELETE_ARRAY(sNorm); - SAFE_DELETE_ARRAY(sPower); - if(infid>0) - close(infid); - // Open and check dimensions of feature file - strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); - prefix_name((char ** const) &prefixedString, adb_feature_root); - if (prefixedString!=tmpStr) - delete[] tmpStr; - initInputFile(prefixedString, false); // nommap, file pointer at correct position - // Load the feature vector data for current track into data_buffer - read_data(infid, pp.trackID, &data_buffer, &data_buffer_size); - // Load power and calculate power and l2norm sequence sums - init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr); - } - } - else{ - // These offsets are w.r.t. the entire database of feature vectors and auxillary variables - trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset - trackIndexOffset=trackOffset/dbH->dim; // num vectors offset - } - Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point - Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table - // Test power thresholds before computing distance - if( ( !usingPower || powers_acceptable(qpPtr[qPos], sPower[sPos])) && - ( qPos<numVectors-sequenceLength+1 && pp.spos<trackTable[pp.trackID]-sequenceLength+1 ) ){ - // Non-large ADB track data is loaded inside power test for efficiency - if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ - // On currentTrack change, allocate and load track data - currentTrack=pp.trackID; - lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); - read_data(dbfid, currentTrack, &data_buffer, &data_buffer_size); - } - // Compute distance - dist = dot_product_points(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength); - double qn = qnPtr[qPos]; - double sn = sNorm[sPos]; - if(normalizedDistance) - dist = 2 - (2/(qn*sn))*dist; - else - if(no_unit_norming) - dist = qn*qn + sn*sn - 2*dist; - // else - // dist = dist; - if((!radius) || dist <= (radius+O2_DISTANCE_TOLERANCE)) - reporter->add_point(pp.trackID, pp.qpos, pp.spos, dist); - } - exact_evaluation_queue->pop(); + qstate->exact_evaluation_queue->pop(); } // Cleanup - free(data_buffer); - SAFE_DELETE_ARRAY(sNorm); - SAFE_DELETE_ARRAY(sPower); - SAFE_DELETE_ARRAY(meanDBdur); + SAFE_DELETE_ARRAY(dbdata); + SAFE_DELETE_ARRAY(dbpointers.l2norm_data); + SAFE_DELETE_ARRAY(dbpointers.power_data); + SAFE_DELETE_ARRAY(dbpointers.mean_duration); + delete qstate->exact_evaluation_queue; + return 0; } -// A completely unprotected dot-product method -// Caller is responsible for ensuring that memory is within bounds -inline double audioDB::dot_product_points(double* q, double* p, Uns32T L){ - double dist = 0.0; - while(L--) - dist += *q++ * *p++; - return dist; -} +int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) { + + double *query, *query_data; + adb_qpointers_internal_t qpointers = {0}, dbpointers = {0}; -void audioDB::query_loop(const char* dbName, Uns32T queryIndex) { - - unsigned int numVectors; - double *query, *query_data; - double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0; - double meanQdur; + bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); - if( dbH->flags & O2_FLAG_LARGE_ADB ) - error("error: LARGE_ADB requires indexed query"); + if(adb->header->flags & O2_FLAG_LARGE_ADB) { + /* FIXME: actually it would be nice to support this mode of + * operation, but for now... */ + return 1; + } - if(query_from_key) - set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex); - else - set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); + if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { + return 1; + } - unsigned int dbVectors; - double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; - double *meanDBdur = 0; + if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) { + return 1; + } - set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors); - - VERB_LOG(1, "matching tracks..."); - - assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(trackNN>0 && trackNN<=O2_MAXNN); - - unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; + unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize; + unsigned wL = spec->qid.sequence_length; double **D = 0; // Differences query and target double **DD = 0; // Matched filter distance - D = new double*[numVectors]; // pre-allocate - DD = new double*[numVectors]; + D = new double*[qpointers.nvectors]; // pre-allocate + DD = new double*[qpointers.nvectors]; - gettimeofday(&tv1, NULL); - unsigned processedTracks = 0; off_t trackIndexOffset; - char nextKey[MAXSTR]; // Track loop size_t data_buffer_size = 0; double *data_buffer = 0; - lseek(dbfid, dbH->dataOffset, SEEK_SET); + lseek(adb->fd, adb->header->dataOffset, SEEK_SET); - for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { - - trackOffset = trackOffsetTable[track]; // numDoubles offset - - // get trackID from file if using a control file - if(trackFile) { - trackFile->getline(nextKey,MAXSTR); - if(!trackFile->eof()) { - track = getKeyPos(nextKey); - trackOffset = trackOffsetTable[track]; - lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); - } else { - break; + std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); + for(track = 0; track < adb->header->numFiles; track++) { + unsigned t = track; + + while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) { + track++; + if(track == adb->header->numFiles) { + goto loop_finish; } } + trackOffset = (*adb->track_offsets)[track]; + if(track != t) { + lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET); + } + trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset - // skip identity on query_from_key - if( query_from_key && (track == queryIndex) ) { - if(queryIndex!=dbH->numFiles-1){ - track++; - trackOffset = trackOffsetTable[track]; - lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); - } - else{ - break; - } + if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) { + return 1; } + if(wL <= (*adb->track_lengths)[track]) { // test for short sequences + + audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD); - trackIndexOffset=trackOffset/dbH->dim; // numVectors offset - - read_data(dbfid, track, &data_buffer, &data_buffer_size); - if(sequenceLength <= trackTable[track]) { // test for short sequences - - VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); - - initialize_arrays(track, numVectors, query, data_buffer, D, DD); - - if(usingTimes) { - VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]); - } - - if((!usingTimes) || fabs(meanDBdur[track]-meanQdur) < meanQdur*timesTol) { - if(usingTimes) { - VERB_LOG(3,"within duration tolerance.\n"); - } + if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) || + fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) { // Search for minimum distance by shingles (concatenated vectors) - for(j = 0; j <= numVectors - wL; j += HOP_SIZE) { - for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { - double thisDist; - if(normalizedDistance) - thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; - else - if(no_unit_norming) - thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; - else - thisDist = DD[j][k]; - + for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) { + for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) { + double thisDist = 0; + double qn = qpointers.l2norm[j]; + double sn = dbpointers.l2norm[trackIndexOffset + k]; + switch(spec->params.distance) { + case ADB_DISTANCE_EUCLIDEAN_NORMED: + thisDist = 2-(2/(qn*sn))*DD[j][k]; + break; + case ADB_DISTANCE_EUCLIDEAN: + thisDist = qn*qn + sn*sn - 2*DD[j][k]; + break; + case ADB_DISTANCE_DOT_PRODUCT: + thisDist = DD[j][k]; + break; + } // Power test - if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) { + if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) { // radius test - if((!radius) || thisDist <= (radius+O2_DISTANCE_TOLERANCE)) { - reporter->add_point(track, usingQueryPoint ? queryPoint : j, k, thisDist); + if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || + thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { + adb_result_t r; + r.key = (*adb->keys)[track].c_str(); + r.dist = thisDist; + if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) { + r.qpos = j; + } else { + r.qpos = spec->qid.sequence_start; + } + r.ipos = k; + qstate->accumulator->add_point(&r); } } } } } // Duration match - delete_arrays(track, numVectors, D, DD); + audiodb_delete_arrays(track, qpointers.nvectors, D, DD); } } + loop_finish: + free(data_buffer); - gettimeofday(&tv2,NULL); - VERB_LOG(1,"elapsed time: %ld msec\n", - (tv2.tv_sec*1000 + tv2.tv_usec/1000) - - (tv1.tv_sec*1000 + tv1.tv_usec/1000)) - // Clean up if(query_data) delete[] query_data; - if(qNorm) - delete[] qNorm; - if(sNorm) - delete[] sNorm; - if(qPower) - delete[] qPower; - if(sPower) - delete[] sPower; + if(qpointers.l2norm_data) + delete[] qpointers.l2norm_data; + if(qpointers.power_data) + delete[] qpointers.power_data; + if(qpointers.mean_duration) + delete[] qpointers.mean_duration; + if(dbpointers.power_data) + delete[] dbpointers.power_data; + if(dbpointers.l2norm_data) + delete[] dbpointers.l2norm_data; if(D) delete[] D; if(DD) delete[] DD; - if(meanDBdur) - delete[] meanDBdur; + if(dbpointers.mean_duration) + delete[] dbpointers.mean_duration; + + return 0; } - -// Unit norm block of features -void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ - unsigned d; - double L2, *p; - - VERB_LOG(2, "norming %u vectors...", n); - while(n--) { - p = X; - L2 = 0.0; - d = dim; - while(d--) { - L2 += *p * *p; - p++; - } - if(qNorm) { - *qNorm++=L2; - } - X += dim; - } - VERB_LOG(2, "done.\n"); -} - -