Mercurial > hg > audiodb
diff query.cpp @ 539:06ed85832c3b multiprobeLSH
Optimized the query_loop_points inner loop for memcpy and I/O efficiency. Uses sparse seeks and reads to perform scattered reads across data set. Current version does not cache fid between open calls to the same trackID.
author | mas01mc |
---|---|
date | Sat, 07 Feb 2009 01:20:05 +0000 |
parents | ddf763553175 |
children | 52d82badc544 |
line wrap: on
line diff
--- a/query.cpp Fri Feb 06 21:08:35 2009 +0000 +++ b/query.cpp Sat Feb 07 01:20:05 2009 +0000 @@ -198,7 +198,7 @@ return 1; } -int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) { +int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d, off_t vector_offset=0, size_t num_vectors=0) { off_t track_offset = (*adb->track_offsets)[track_id]; if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) { /* create a reference/insert, then use adb_insert_create_datum() */ @@ -217,24 +217,27 @@ read_or_goto_error(adb->fd, times, ADB_MAXSTR); reference.times = times; } - return audiodb_insert_create_datum(&reference, d); + return audiodb_insert_create_datum(&reference, d, vector_offset*adb->header->dim*sizeof(double), num_vectors*adb->header->dim*sizeof(double)); } else { /* initialize from sources of data that we already have */ - d->nvectors = (*adb->track_lengths)[track_id]; + if(num_vectors) + d->nvectors = num_vectors; + else + d->nvectors = (*adb->track_lengths)[track_id]; d->dim = adb->header->dim; d->key = (*adb->keys)[track_id].c_str(); /* read out stuff from the database tables */ d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double)); - lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET); + lseek(adb->fd, adb->header->dataOffset + track_offset + vector_offset*d->dim*sizeof(double), SEEK_SET); read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double)); if(adb->header->flags & ADB_HEADER_FLAG_POWER) { d->power = (double *) malloc(d->nvectors * sizeof(double)); - lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET); + lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim + vector_offset*sizeof(double), SEEK_SET); read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double)); } if(adb->header->flags & ADB_HEADER_FLAG_TIMES) { d->times = (double *) malloc(2 * d->nvectors * sizeof(double)); - lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET); + lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim + 2 * vector_offset*sizeof(double), SEEK_SET); read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double)); } return 0; @@ -285,34 +288,19 @@ adb_qstate_internal_t *qstate){ uint32_t nvectors = d->nvectors; qpointers->nvectors = nvectors; - std::priority_queue<PointPair, std::vector<PointPair>, greater<PointPair> > ppairs(*qstate->exact_evaluation_queue); - size_t vector_size = nvectors * sizeof(double) * d->dim; - - if(d->power) - qpointers->power_data = new double[vector_size / d->dim]; - - uint32_t seq_len_dbl = sequence_length*sizeof(double); - PointPair pp = ppairs.top(); - uint32_t tid = pp.trackID; - - while( !ppairs.empty() && pp.trackID==tid){ - uint32_t spos = pp.spos; + PointPair pp = (*qstate->exact_evaluation_queue).top(); #ifdef _LSH_DEBUG_ - cout << "tid=" << pp.trackID << " qpos=" << pp.qpos << " spos=" << pp.spos << endl; - cout.flush(); + cout << "tid=" << pp.trackID << " qpos=" << pp.qpos << " spos=" << pp.spos << endl; + cout.flush(); #endif - - if(d->power) { - memcpy(qpointers->power_data+spos, d->power+spos, seq_len_dbl); - audiodb_sequence_sum(qpointers->power_data+spos, sequence_length, sequence_length); - audiodb_sequence_average(qpointers->power_data+spos, sequence_length, sequence_length); - } - ppairs.pop(); - if(!ppairs.empty()) - pp = ppairs.top(); + + if(d->power) { + //memcpy(qpointers->power_data, d->power, seq_len_dbl); + audiodb_sequence_sum(d->power, sequence_length, sequence_length); + audiodb_sequence_average(d->power, sequence_length, sequence_length); } - + if(d->times) { qpointers->mean_duration = new double[1]; *qpointers->mean_duration = 0; @@ -321,11 +309,11 @@ } *qpointers->mean_duration /= nvectors; } - + *vector = d->data; *vector_data = d->data; qpointers->l2norm = 0 ; - qpointers->power = qpointers->power_data; + qpointers->power = d->power; return 0; } @@ -496,7 +484,6 @@ */ double dist; double *dbdata = 0, *dbdata_pointer; - Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range Uns32T npairs = qstate->exact_evaluation_queue->size(); #ifdef _LSH_DEBUG_ cout << "Num vector pairs to evaluate: " << npairs << "..." << endl; @@ -505,33 +492,28 @@ adb_datum_t d = {0}; while(npairs--) { PointPair pp = qstate->exact_evaluation_queue->top(); - if(currentTrack != pp.trackID) { - maybe_delete_array(dbpointers.power_data); - maybe_delete_array(dbpointers.mean_duration); - currentTrack = pp.trackID; + maybe_delete_array(dbpointers.mean_duration); + if(audiodb_track_id_datum(adb, pp.trackID, &d, pp.spos, sequence_length)) { + delete qstate->exact_evaluation_queue; + delete qstate->set; + return 1; + } + + if(audiodb_datum_qpointers_partial(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers, qstate)) { + delete qstate->exact_evaluation_queue; + delete qstate->set; audiodb_free_datum(&d); - if(audiodb_track_id_datum(adb, pp.trackID, &d)) { - delete qstate->exact_evaluation_queue; - delete qstate->set; - return 1; - } + return 1; + } - if(audiodb_datum_qpointers_partial(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers, qstate)) { - delete qstate->exact_evaluation_queue; - delete qstate->set; - audiodb_free_datum(&d); - return 1; - } - } Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0; - Uns32T sPos = pp.spos; // index into l2norm table // Test power thresholds before computing distance - if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && - ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ + if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[0])) && + ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ // Compute distance - dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); + dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata, adb->header->dim*sequence_length); double qn = audiodb_dot_product(query + qPos*adb->header->dim, query + qPos*adb->header->dim, adb->header->dim*sequence_length); - double sn = audiodb_dot_product(dbdata + sPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); + double sn = audiodb_dot_product(dbdata, dbdata, adb->header->dim*sequence_length); qn = sqrt(qn); sn = sqrt(sn); switch(spec->params.distance) { @@ -553,13 +535,13 @@ } } qstate->exact_evaluation_queue->pop(); + audiodb_free_datum(&d); } // Cleanup - audiodb_free_datum(&d); // maybe_delete_array(dbdata); //maybe_delete_array(dbpointers.l2norm_data); - maybe_delete_array(dbpointers.power_data); + //maybe_delete_array(dbpointers.power_data); maybe_delete_array(dbpointers.mean_duration); delete qstate->exact_evaluation_queue; delete qstate->set;