comparison query.cpp @ 455:93ce12fe2f76 api-inversion

Begin pushing adb_t *adb into index_query_loop and query_loop_points
author mas01cr
date Wed, 24 Dec 2008 10:57:23 +0000
parents f3b0ddc1ead0
children 913a95f06998
comparison
equal deleted inserted replaced
454:f3b0ddc1ead0 455:93ce12fe2f76
220 } 220 }
221 221
222 // Test for index (again) here 222 // Test for index (again) here
223 if((qspec.refine.flags & ADB_REFINE_RADIUS) && index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){ 223 if((qspec.refine.flags & ADB_REFINE_RADIUS) && index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
224 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequence_length=%d\n", adb->path, qspec.refine.radius, qspec.qid.sequence_length); 224 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequence_length=%d\n", adb->path, qspec.refine.radius, qspec.qid.sequence_length);
225 index_query_loop(&qspec, dbName, query_from_key_index); 225 index_query_loop(adb, &qspec);
226 } 226 }
227 else{ 227 else{
228 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName); 228 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
229 if(query_loop(adb, &qspec)) { 229 if(query_loop(adb, &qspec)) {
230 error("query_loop failed"); 230 error("query_loop failed");
604 // A reporter has been allocated 604 // A reporter has been allocated
605 // 605 //
606 // Postconditions: 606 // Postconditions:
607 // reporter contains the points and distances that meet the reporter constraints 607 // reporter contains the points and distances that meet the reporter constraints
608 608
609 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) { 609 void audioDB::query_loop_points(adb_t *adb, adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) {
610 adb_qpointers_internal_t dbpointers = {0}; 610 adb_qpointers_internal_t dbpointers = {0};
611 611
612 uint32_t sequence_length = spec->qid.sequence_length; 612 uint32_t sequence_length = spec->qid.sequence_length;
613 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); 613 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
614 614
615 // check pre-conditions 615 if(exact_evaluation_queue->size() == 0) {
616 assert(exact_evaluation_queue&&reporter);
617 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
618 return; 616 return;
619 617 }
620 // Compute database info 618
621 // FIXME: we more than likely don't need very much of the database 619 // Compute database info. FIXME: we more than likely don't need
622 // so make a new method to build these values per-track or, even better, per-point 620 // very much of the database so write a new function to build these
623 if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) 621 // values per-track or, even better, per-point
622 if(!(adb->header->flags & O2_FLAG_LARGE_ADB)) {
624 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) { 623 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
625 error("failed to set up db"); 624 error("failed to set up dbpointers");
626 } 625 }
626 }
627 627
628 // We are guaranteed that the order of points is sorted by: 628 // We are guaranteed that the order of points is sorted by:
629 // trackID, spos, qpos 629 // {trackID, spos, qpos} so we can be relatively efficient in
630 // so we can be relatively efficient in initialization of track data. 630 // initialization of track data. We assume that points usually
631 // Here we assume that points don't overlap, so we will use exhaustive dot 631 // don't overlap, so we will use exhaustive dot product evaluation
632 // product evaluation instead of memoization of partial sums which is used 632 // instead of memoization of partial sums which is used for
633 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop() 633 // exhaustive brute-force evaluation from smaller databases in
634 // query_loop()
634 double dist; 635 double dist;
635 size_t data_buffer_size = 0; 636 size_t data_buffer_size = 0;
636 double *data_buffer = 0; 637 double *data_buffer = 0;
637 Uns32T trackOffset = 0; 638 Uns32T trackOffset = 0;
638 Uns32T trackIndexOffset = 0; 639 Uns32T trackIndexOffset = 0;
639 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range 640 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
640 Uns32T npairs = exact_evaluation_queue->size(); 641 Uns32T npairs = exact_evaluation_queue->size();
641 while(npairs--){ 642 while(npairs--){
642 PointPair pp = exact_evaluation_queue->top(); 643 PointPair pp = exact_evaluation_queue->top();
643 // Large ADB track data must be loaded here for sPower 644 // Large ADB track data must be loaded here for sPower
644 if(dbH->flags & O2_FLAG_LARGE_ADB){ 645 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
645 trackOffset=0; 646 trackOffset=0;
646 trackIndexOffset=0; 647 trackIndexOffset=0;
647 if(currentTrack!=pp.trackID){ 648 if(currentTrack!=pp.trackID){
648 char* prefixedString = new char[O2_MAXFILESTR]; 649 char* prefixedString = new char[O2_MAXFILESTR];
649 char* tmpStr = prefixedString; 650 char* tmpStr = prefixedString;
667 } 668 }
668 } 669 }
669 else{ 670 else{
670 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables 671 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
671 trackOffset = (*adb->track_offsets)[pp.trackID]; 672 trackOffset = (*adb->track_offsets)[pp.trackID];
672 trackIndexOffset = trackOffset/(dbH->dim * sizeof(double)); // num vectors offset 673 trackIndexOffset = trackOffset/(adb->header->dim * sizeof(double)); // num vectors offset
673 } 674 }
674 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point 675 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
675 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table 676 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
676 // Test power thresholds before computing distance 677 // Test power thresholds before computing distance
677 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && 678 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
678 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ 679 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
679 // Non-large ADB track data is loaded inside power test for efficiency 680 // Non-large ADB track data is loaded inside power test for efficiency
680 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ 681 if(!(adb->header->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
681 // On currentTrack change, allocate and load track data 682 // On currentTrack change, allocate and load track data
682 currentTrack=pp.trackID; 683 currentTrack=pp.trackID;
683 lseek(dbfid, dbH->dataOffset + trackOffset, SEEK_SET); 684 lseek(dbfid, adb->header->dataOffset + trackOffset, SEEK_SET);
684 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size)) 685 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size))
685 error("failed to read data"); 686 error("failed to read data");
686 } 687 }
687 // Compute distance 688 // Compute distance
688 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length); 689 dist = audiodb_dot_product(query + qPos*adb->header->dim, data_buffer + pp.spos*adb->header->dim, adb->header->dim*sequence_length);
689 double qn = qpointers->l2norm[qPos]; 690 double qn = qpointers->l2norm[qPos];
690 double sn = dbpointers.l2norm[sPos]; 691 double sn = dbpointers.l2norm[sPos];
691 switch(spec->params.distance) { 692 switch(spec->params.distance) {
692 case ADB_DISTANCE_EUCLIDEAN_NORMED: 693 case ADB_DISTANCE_EUCLIDEAN_NORMED:
693 dist = 2 - (2/(qn*sn))*dist; 694 dist = 2 - (2/(qn*sn))*dist;