Mercurial > hg > audiodb
comparison query.cpp @ 462:f689510baaf4 api-inversion
Simplify audioDB::query_loop_points.
Using the new functions audiodb_track_id_datum() and
audiodb_datum_qpointers(), much of the body of the method disappears.
Of course, we've probably introduced some inefficiencies and extra
memory copies, but I'm fairly sure that this method is going to be
dominated by disk i/o time anyway, so it doesn't matter.
author | mas01cr |
---|---|
date | Tue, 30 Dec 2008 15:38:55 +0000 |
parents | 2b8cfec91ed7 |
children | 35bb388d0eac |
comparison
equal
deleted
inserted
replaced
461:2b8cfec91ed7 | 462:f689510baaf4 |
---|---|
650 | 650 |
651 if(qstate->exact_evaluation_queue->size() == 0) { | 651 if(qstate->exact_evaluation_queue->size() == 0) { |
652 return; | 652 return; |
653 } | 653 } |
654 | 654 |
655 // Compute database info. FIXME: we more than likely don't need | 655 /* We are guaranteed that the order of points is sorted by: |
656 // very much of the database so write a new function to build these | 656 * {trackID, spos, qpos} so we can be relatively efficient in |
657 // values per-track or, even better, per-point | 657 * initialization of track data. We assume that points usually |
658 if(!(adb->header->flags & O2_FLAG_LARGE_ADB)) { | 658 * don't overlap, so we will use exhaustive dot product evaluation |
659 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) { | 659 * (instead of memoization of partial sums, as in query_loop()). */ |
660 error("failed to set up dbpointers"); | |
661 } | |
662 } | |
663 | |
664 // We are guaranteed that the order of points is sorted by: | |
665 // {trackID, spos, qpos} so we can be relatively efficient in | |
666 // initialization of track data. We assume that points usually | |
667 // don't overlap, so we will use exhaustive dot product evaluation | |
668 // instead of memoization of partial sums which is used for | |
669 // exhaustive brute-force evaluation from smaller databases in | |
670 // query_loop() | |
671 double dist; | 660 double dist; |
672 size_t data_buffer_size = 0; | 661 double *dbdata = 0, *dbdata_pointer; |
673 double *data_buffer = 0; | 662 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range |
674 Uns32T trackOffset = 0; | |
675 Uns32T trackIndexOffset = 0; | |
676 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range | |
677 Uns32T npairs = qstate->exact_evaluation_queue->size(); | 663 Uns32T npairs = qstate->exact_evaluation_queue->size(); |
678 while(npairs--){ | 664 while(npairs--) { |
679 PointPair pp = qstate->exact_evaluation_queue->top(); | 665 PointPair pp = qstate->exact_evaluation_queue->top(); |
680 // Large ADB track data must be loaded here for sPower | 666 if(currentTrack != pp.trackID) { |
681 if(adb->header->flags & O2_FLAG_LARGE_ADB) { | 667 SAFE_DELETE_ARRAY(dbdata); |
682 trackOffset=0; | 668 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); |
683 trackIndexOffset=0; | 669 SAFE_DELETE_ARRAY(dbpointers.power_data); |
684 if(currentTrack!=pp.trackID){ | 670 SAFE_DELETE_ARRAY(dbpointers.mean_duration); |
685 char* prefixedString = new char[O2_MAXFILESTR]; | 671 currentTrack = pp.trackID; |
686 char* tmpStr = prefixedString; | 672 adb_datum_t d = {0}; |
687 // On currentTrack change, allocate and load track data | 673 if(audiodb_track_id_datum(adb, pp.trackID, &d)) { |
688 currentTrack=pp.trackID; | 674 error("failed to get datum"); |
689 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); | 675 } |
690 SAFE_DELETE_ARRAY(dbpointers.power_data); | 676 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) { |
691 if(infid>0) | 677 audiodb_free_datum(&d); |
692 close(infid); | 678 error("failed to get dbpointers"); |
693 // Open and check dimensions of feature file | 679 } |
694 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); | 680 audiodb_free_datum(&d); |
695 prefix_name((char ** const) &prefixedString, adb_feature_root); | 681 } |
696 if (prefixedString!=tmpStr) | |
697 delete[] tmpStr; | |
698 initInputFile(prefixedString); | |
699 // Load the feature vector data for current track into data_buffer | |
700 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size)) | |
701 error("failed to read data"); | |
702 // Load power and calculate power and l2norm sequence sums | |
703 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power); | |
704 } | |
705 } | |
706 else{ | |
707 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables | |
708 trackOffset = (*adb->track_offsets)[pp.trackID]; | |
709 trackIndexOffset = trackOffset/(adb->header->dim * sizeof(double)); // num vectors offset | |
710 } | |
711 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point | 682 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point |
712 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table | 683 Uns32T sPos = pp.spos; // index into l2norm table |
713 // Test power thresholds before computing distance | 684 // Test power thresholds before computing distance |
714 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && | 685 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && |
715 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ | 686 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ |
716 // Non-large ADB track data is loaded inside power test for efficiency | |
717 if(!(adb->header->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ | |
718 // On currentTrack change, allocate and load track data | |
719 currentTrack=pp.trackID; | |
720 lseek(dbfid, adb->header->dataOffset + trackOffset, SEEK_SET); | |
721 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size)) | |
722 error("failed to read data"); | |
723 } | |
724 // Compute distance | 687 // Compute distance |
725 dist = audiodb_dot_product(query + qPos*adb->header->dim, data_buffer + pp.spos*adb->header->dim, adb->header->dim*sequence_length); | 688 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); |
726 double qn = qpointers->l2norm[qPos]; | 689 double qn = qpointers->l2norm[qPos]; |
727 double sn = dbpointers.l2norm[sPos]; | 690 double sn = dbpointers.l2norm[sPos]; |
728 switch(spec->params.distance) { | 691 switch(spec->params.distance) { |
729 case ADB_DISTANCE_EUCLIDEAN_NORMED: | 692 case ADB_DISTANCE_EUCLIDEAN_NORMED: |
730 dist = 2 - (2/(qn*sn))*dist; | 693 dist = 2 - (2/(qn*sn))*dist; |
743 } | 706 } |
744 } | 707 } |
745 qstate->exact_evaluation_queue->pop(); | 708 qstate->exact_evaluation_queue->pop(); |
746 } | 709 } |
747 // Cleanup | 710 // Cleanup |
711 SAFE_DELETE_ARRAY(dbdata); | |
748 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); | 712 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); |
749 SAFE_DELETE_ARRAY(dbpointers.power_data); | 713 SAFE_DELETE_ARRAY(dbpointers.power_data); |
750 SAFE_DELETE_ARRAY(dbpointers.mean_duration); | 714 SAFE_DELETE_ARRAY(dbpointers.mean_duration); |
751 delete qstate->exact_evaluation_queue; | 715 delete qstate->exact_evaluation_queue; |
752 } | 716 } |