comparison query.cpp @ 462:f689510baaf4 api-inversion

Simplify audioDB::query_loop_points. Using the new functions audiodb_track_id_datum() and audiodb_datum_qpointers(), much of the body of the method disappears. Of course, we've probably introduced some inefficiencies and extra memory copies, but I'm fairly sure that this method is going to be dominated by disk i/o time anyway, so it doesn't matter.
author mas01cr
date Tue, 30 Dec 2008 15:38:55 +0000
parents 2b8cfec91ed7
children 35bb388d0eac
comparison
equal deleted inserted replaced
461:2b8cfec91ed7 462:f689510baaf4
650 650
651 if(qstate->exact_evaluation_queue->size() == 0) { 651 if(qstate->exact_evaluation_queue->size() == 0) {
652 return; 652 return;
653 } 653 }
654 654
655 // Compute database info. FIXME: we more than likely don't need 655 /* We are guaranteed that the order of points is sorted by:
656 // very much of the database so write a new function to build these 656 * {trackID, spos, qpos} so we can be relatively efficient in
657 // values per-track or, even better, per-point 657 * initialization of track data. We assume that points usually
658 if(!(adb->header->flags & O2_FLAG_LARGE_ADB)) { 658 * don't overlap, so we will use exhaustive dot product evaluation
659 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) { 659 * (instead of memoization of partial sums, as in query_loop()). */
660 error("failed to set up dbpointers");
661 }
662 }
663
664 // We are guaranteed that the order of points is sorted by:
665 // {trackID, spos, qpos} so we can be relatively efficient in
666 // initialization of track data. We assume that points usually
667 // don't overlap, so we will use exhaustive dot product evaluation
668 // instead of memoization of partial sums which is used for
669 // exhaustive brute-force evaluation from smaller databases in
670 // query_loop()
671 double dist; 660 double dist;
672 size_t data_buffer_size = 0; 661 double *dbdata = 0, *dbdata_pointer;
673 double *data_buffer = 0; 662 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range
674 Uns32T trackOffset = 0;
675 Uns32T trackIndexOffset = 0;
676 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
677 Uns32T npairs = qstate->exact_evaluation_queue->size(); 663 Uns32T npairs = qstate->exact_evaluation_queue->size();
678 while(npairs--){ 664 while(npairs--) {
679 PointPair pp = qstate->exact_evaluation_queue->top(); 665 PointPair pp = qstate->exact_evaluation_queue->top();
680 // Large ADB track data must be loaded here for sPower 666 if(currentTrack != pp.trackID) {
681 if(adb->header->flags & O2_FLAG_LARGE_ADB) { 667 SAFE_DELETE_ARRAY(dbdata);
682 trackOffset=0; 668 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
683 trackIndexOffset=0; 669 SAFE_DELETE_ARRAY(dbpointers.power_data);
684 if(currentTrack!=pp.trackID){ 670 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
685 char* prefixedString = new char[O2_MAXFILESTR]; 671 currentTrack = pp.trackID;
686 char* tmpStr = prefixedString; 672 adb_datum_t d = {0};
687 // On currentTrack change, allocate and load track data 673 if(audiodb_track_id_datum(adb, pp.trackID, &d)) {
688 currentTrack=pp.trackID; 674 error("failed to get datum");
689 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); 675 }
690 SAFE_DELETE_ARRAY(dbpointers.power_data); 676 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
691 if(infid>0) 677 audiodb_free_datum(&d);
692 close(infid); 678 error("failed to get dbpointers");
693 // Open and check dimensions of feature file 679 }
694 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); 680 audiodb_free_datum(&d);
695 prefix_name((char ** const) &prefixedString, adb_feature_root); 681 }
696 if (prefixedString!=tmpStr)
697 delete[] tmpStr;
698 initInputFile(prefixedString);
699 // Load the feature vector data for current track into data_buffer
700 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size))
701 error("failed to read data");
702 // Load power and calculate power and l2norm sequence sums
703 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power);
704 }
705 }
706 else{
707 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
708 trackOffset = (*adb->track_offsets)[pp.trackID];
709 trackIndexOffset = trackOffset/(adb->header->dim * sizeof(double)); // num vectors offset
710 }
711 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point 682 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
712 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table 683 Uns32T sPos = pp.spos; // index into l2norm table
713 // Test power thresholds before computing distance 684 // Test power thresholds before computing distance
714 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && 685 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
715 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ 686 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
716 // Non-large ADB track data is loaded inside power test for efficiency
717 if(!(adb->header->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
718 // On currentTrack change, allocate and load track data
719 currentTrack=pp.trackID;
720 lseek(dbfid, adb->header->dataOffset + trackOffset, SEEK_SET);
721 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size))
722 error("failed to read data");
723 }
724 // Compute distance 687 // Compute distance
725 dist = audiodb_dot_product(query + qPos*adb->header->dim, data_buffer + pp.spos*adb->header->dim, adb->header->dim*sequence_length); 688 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length);
726 double qn = qpointers->l2norm[qPos]; 689 double qn = qpointers->l2norm[qPos];
727 double sn = dbpointers.l2norm[sPos]; 690 double sn = dbpointers.l2norm[sPos];
728 switch(spec->params.distance) { 691 switch(spec->params.distance) {
729 case ADB_DISTANCE_EUCLIDEAN_NORMED: 692 case ADB_DISTANCE_EUCLIDEAN_NORMED:
730 dist = 2 - (2/(qn*sn))*dist; 693 dist = 2 - (2/(qn*sn))*dist;
743 } 706 }
744 } 707 }
745 qstate->exact_evaluation_queue->pop(); 708 qstate->exact_evaluation_queue->pop();
746 } 709 }
747 // Cleanup 710 // Cleanup
711 SAFE_DELETE_ARRAY(dbdata);
748 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); 712 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
749 SAFE_DELETE_ARRAY(dbpointers.power_data); 713 SAFE_DELETE_ARRAY(dbpointers.power_data);
750 SAFE_DELETE_ARRAY(dbpointers.mean_duration); 714 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
751 delete qstate->exact_evaluation_queue; 715 delete qstate->exact_evaluation_queue;
752 } 716 }