comparison index.cpp @ 458:913a95f06998 api-inversion

Start using the query state structure. Actually using it means moving it around in the source code a little bit, thanks to entanglements. It'll all be alright on the night. Now the accumulator, allowed_keys and exact_evaluation_queue are all part of the query state, and can therefore be passed around with minimal effort (and deleted in the appropriate place). Now a whole bunch of static methods (the callbacks, basically) in index.cpp can be rewritten as plain old C functions. The callbacks need both an adb_t and a query state structure to function (the adb_t to get at things like lsh_n_point_bits and the track->key table; the qstate to get at the accumulator and allowed_keys list). Rearrange audioDB::query a little bit, and mark the beginning and the end of the putative audiodb_query_spec() API function implementation.
author mas01cr
date Sun, 28 Dec 2008 18:44:08 +0000
parents 0ef029232213
children fcc6f7c4856b
comparison
equal deleted inserted replaced
457:823bca1e10f5 458:913a95f06998
12 // 19th August 2008 - added O2_FLAG_LARGE_ADB support 12 // 19th August 2008 - added O2_FLAG_LARGE_ADB support
13 13
14 #include "audioDB.h" 14 #include "audioDB.h"
15 #include "audioDB-internals.h" 15 #include "audioDB-internals.h"
16 16
17 /************************* LSH point index to audioDB conversion *****************/ 17 typedef struct adb_qcallback {
18 Uns32T audioDB::index_to_trackID(Uns32T lshID, Uns32T nPntBits){ 18 adb_t *adb;
19 assert(nPntBits); 19 adb_qstate_internal_t *qstate;
20 return lshID>>nPntBits; 20 } adb_qcallback_t;
21 }
22
23 Uns32T audioDB::index_to_trackPos(Uns32T lshID, Uns32T nPntBits){
24 assert(nPntBits);
25 return lshID&((1<<nPntBits)-1);
26 }
27
28 Uns32T audioDB::index_from_trackInfo(Uns32T trackID, Uns32T spos, Uns32T nPntBits){
29 assert(nPntBits);
30 return (trackID << nPntBits) | spos;
31 }
32 21
33 /************************* LSH indexing and query initialization *****************/ 22 /************************* LSH indexing and query initialization *****************/
34 23
35 char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){ 24 char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){
36 char* indexName = new char[MAXSTR]; 25 char* indexName = new char[MAXSTR];
264 char* mergeIndexName = newIndexName; 253 char* mergeIndexName = newIndexName;
265 254
266 // Get the lsh header info and find how many tracks are inserted already 255 // Get the lsh header info and find how many tracks are inserted already
267 lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here 256 lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here
268 assert(lsh); 257 assert(lsh);
269 Uns32T maxs = index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1; 258 Uns32T maxs = audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1;
270 delete lsh; 259 delete lsh;
271 lsh = 0; 260 lsh = 0;
272 261
273 // Insert up to lsh_param_b tracks 262 // Insert up to lsh_param_b tracks
274 if( !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){ 263 if( !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){
481 Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){ 470 Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){
482 Uns32T collisionCount = 0; 471 Uns32T collisionCount = 0;
483 cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE; 472 cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE;
484 for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){ 473 for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){
485 if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold))) 474 if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold)))
486 collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID, lsh_n_point_bits)); 475 collisionCount += lsh->insert_point((*vv)[pointID], audiodb_index_from_trackinfo(trackID, pointID, audiodb_lsh_n_point_bits(adb)));
487 spp+=sequenceHop; 476 spp+=sequenceHop;
488 } 477 }
489 return collisionCount; 478 return collisionCount;
490 } 479 }
491 480
517 VERB_LOG(1,"INDEX: seqlen %d\n", sequenceLength); 506 VERB_LOG(1,"INDEX: seqlen %d\n", sequenceLength);
518 VERB_LOG(1,"INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); 507 VERB_LOG(1,"INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth());
519 VERB_LOG(1,"INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); 508 VERB_LOG(1,"INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns());
520 VERB_LOG(1,"INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); 509 VERB_LOG(1,"INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables());
521 VERB_LOG(1,"INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); 510 VERB_LOG(1,"INDEX: N %d\n", lsh->get_lshHeader()->get_numRows());
522 VERB_LOG(1,"INDEX: s %d\n", index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)); 511 VERB_LOG(1,"INDEX: s %d\n", audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb)));
523 VERB_LOG(1,"INDEX: Opened LSH index file %s\n", indexName); 512 VERB_LOG(1,"INDEX: Opened LSH index file %s\n", indexName);
524 } 513 }
525 514
526 // Check to see if we are loading hash tables into core, and do so if true 515 // Check to see if we are loading hash tables into core, and do so if true
527 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ 516 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){
534 523
535 delete[] indexName; 524 delete[] indexName;
536 return true; 525 return true;
537 } 526 }
538 527
539 // *Static* approximate NN point reporter callback method for lshlib 528 void audiodb_index_add_point_approximate(void *user_data, Uns32T pointID, Uns32T qpos, float dist) {
540 void audioDB::index_add_point_approximate(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){ 529 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
541 assert(instancePtr); // We need an instance for this callback 530 adb_t *adb = data->adb;
542 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance 531 adb_qstate_internal_t *qstate = data->qstate;
543 Uns32T trackID = index_to_trackID(pointID, myself->lsh_n_point_bits); 532 uint32_t nbits = audiodb_lsh_n_point_bits(adb);
544 Uns32T spos = index_to_trackPos(pointID, myself->lsh_n_point_bits); 533 uint32_t trackID = audiodb_index_to_track_id(pointID, nbits);
545 // Skip identity in query_from_key 534 uint32_t spos = audiodb_index_to_track_pos(pointID, nbits);
546 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) ) { 535 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
536 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
547 adb_result_t r; 537 adb_result_t r;
548 r.key = myself->fileTable + trackID * O2_FILETABLE_ENTRY_SIZE; 538 r.key = (*adb->keys)[trackID].c_str();
549 r.dist = dist; 539 r.dist = dist;
550 r.qpos = qpos; 540 r.qpos = qpos;
551 r.ipos = spos; 541 r.ipos = spos;
552 myself->accumulator->add_point(&r); 542 qstate->accumulator->add_point(&r);
553 } 543 }
554 } 544 }
555 545
556 // *Static* exact NN point reporter callback method for lshlib 546 // Maintain a queue of points to pass to query_loop_points() for exact
557 // Maintain a queue of points to pass to query_points() for exact evaluation 547 // evaluation
558 void audioDB::index_add_point_exact(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){ 548 void audiodb_index_add_point_exact(void *user_data, Uns32T pointID, Uns32T qpos, float dist) {
559 assert(instancePtr); // We need an instance for this callback 549 adb_qcallback_t *data = (adb_qcallback_t *) user_data;
560 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance 550 adb_t *adb = data->adb;
561 Uns32T trackID = index_to_trackID(pointID, myself->lsh_n_point_bits); 551 adb_qstate_internal_t *qstate = data->qstate;
562 Uns32T spos = index_to_trackPos(pointID, myself->lsh_n_point_bits); 552 uint32_t nbits = audiodb_lsh_n_point_bits(adb);
563 // Skip identity in query_from_key 553 uint32_t trackID = audiodb_index_to_track_id(pointID, nbits);
564 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) ) 554 uint32_t spos = audiodb_index_to_track_pos(pointID, nbits);
565 myself->index_insert_exact_evaluation_queue(trackID, qpos, spos); 555 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
566 } 556 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) {
567 557 PointPair p(trackID, qpos, spos);
568 void audioDB::initialize_exact_evalutation_queue(){ 558 qstate->exact_evaluation_queue->push(p);
569 if(exact_evaluation_queue) 559 }
570 delete exact_evaluation_queue;
571 exact_evaluation_queue = new priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >;
572 }
573
574 void audioDB::index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos){
575 PointPair p(trackID, qpos, spos);
576 exact_evaluation_queue->push(p);
577 } 560 }
578 561
579 // return 0: if index does not exist 562 // return 0: if index does not exist
580 // return nqv: if index exists 563 // return nqv: if index exists
581 int audioDB::index_query_loop(adb_t *adb, adb_query_spec_t *spec) { 564 int audioDB::index_query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
582 565
583 double *query = 0, *query_data = 0; 566 double *query = 0, *query_data = 0;
584 adb_qpointers_internal_t qpointers = {0}; 567 adb_qpointers_internal_t qpointers = {0};
585 568
569 adb_qcallback_t callback_data;
570 callback_data.adb = adb;
571 callback_data.qstate = qstate;
572
586 void (*add_point_func)(void*,Uns32T,Uns32T,float); 573 void (*add_point_func)(void*,Uns32T,Uns32T,float);
587 574
588 sequenceLength = spec->qid.sequence_length; 575 sequenceLength = spec->qid.sequence_length;
589 normalizedDistance = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED); 576 normalizedDistance = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED);
590 577
591 // Set the point-reporter callback based on the value of lsh_exact 578 // Set the point-reporter callback based on the value of lsh_exact
592 if(lsh_exact){ 579 if(lsh_exact) {
593 initialize_exact_evalutation_queue(); 580 qstate->exact_evaluation_queue = new std::priority_queue<PointPair>;
594 add_point_func = &index_add_point_exact; 581 add_point_func = &audiodb_index_add_point_exact;
595 } 582 } else {
596 else 583 add_point_func = &audiodb_index_add_point_approximate;
597 add_point_func = &index_add_point_approximate; 584 }
598 585
599 if(!index_init_query(adb->path)) // sets-up LSH index structures for querying 586 if(!index_init_query(adb->path)) // sets-up LSH index structures for querying
600 return 0; 587 return 0;
601 588
602 char* database = index_get_name(adb->path, radius, sequenceLength); 589 char* database = index_get_name(adb->path, radius, sequenceLength);
619 // Nq contains number of inspected points in query file, 606 // Nq contains number of inspected points in query file,
620 // numVecsAboveThreshold is number of points with power >= absolute_threshold 607 // numVecsAboveThreshold is number of points with power >= absolute_threshold
621 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation 608 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation
622 if(usingQueryPoint && numVecsAboveThreshold){ 609 if(usingQueryPoint && numVecsAboveThreshold){
623 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) 610 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
624 lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, (void*)this); 611 lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, &callback_data);
625 else 612 else
626 lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, (void*)this); 613 lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, &callback_data);
627 } 614 }
628 else if(numVecsAboveThreshold) 615 else if(numVecsAboveThreshold)
629 for( Uns32T pointID = 0 ; pointID < Nq; pointID++ ) 616 for( Uns32T pointID = 0 ; pointID < Nq; pointID++ )
630 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) { 617 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) {
631 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) { 618 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
632 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, (void*)this); 619 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
633 } else { 620 } else {
634 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, (void*)this); 621 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);
635 } 622 }
636 } 623 }
637 624
638 if(lsh_exact) 625 if(lsh_exact)
639 // Perform exact distance computation on point pairs in exact_evaluation_queue 626 // Perform exact distance computation on point pairs in exact_evaluation_queue
640 query_loop_points(adb, spec, query, &qpointers); 627 query_loop_points(adb, spec, qstate, query, &qpointers);
641 628
642 // Close the index file 629 // Close the index file
643 close(lshfid); 630 close(lshfid);
644 631
645 // Clean up 632 // Clean up