Mercurial > hg > audiodb
comparison index.cpp @ 458:913a95f06998 api-inversion
Start using the query state structure.
Actually using it means moving it around in the source code a little
bit, thanks to entanglements. It'll all be alright on the night. Now
the accumulator, allowed_keys and exact_evaluation_queue are all part of
the query state, and can therefore be passed around with minimal effort
(and deleted in the appropriate place).
Now a whole bunch of static methods (the callbacks, basically) in
index.cpp can be rewritten as plain old C functions. The callbacks need
both an adb_t and a query state structure to function (the adb_t to get
at things like lsh_n_point_bits and the track->key table; the qstate to
get at the accumulator and allowed_keys list).
Rearrange audioDB::query a little bit, and mark the beginning and the
end of the putative audiodb_query_spec() API function implementation.
author | mas01cr |
---|---|
date | Sun, 28 Dec 2008 18:44:08 +0000 |
parents | 0ef029232213 |
children | fcc6f7c4856b |
comparison
equal
deleted
inserted
replaced
457:823bca1e10f5 | 458:913a95f06998 |
---|---|
12 // 19th August 2008 - added O2_FLAG_LARGE_ADB support | 12 // 19th August 2008 - added O2_FLAG_LARGE_ADB support |
13 | 13 |
14 #include "audioDB.h" | 14 #include "audioDB.h" |
15 #include "audioDB-internals.h" | 15 #include "audioDB-internals.h" |
16 | 16 |
17 /************************* LSH point index to audioDB conversion *****************/ | 17 typedef struct adb_qcallback { |
18 Uns32T audioDB::index_to_trackID(Uns32T lshID, Uns32T nPntBits){ | 18 adb_t *adb; |
19 assert(nPntBits); | 19 adb_qstate_internal_t *qstate; |
20 return lshID>>nPntBits; | 20 } adb_qcallback_t; |
21 } | |
22 | |
23 Uns32T audioDB::index_to_trackPos(Uns32T lshID, Uns32T nPntBits){ | |
24 assert(nPntBits); | |
25 return lshID&((1<<nPntBits)-1); | |
26 } | |
27 | |
28 Uns32T audioDB::index_from_trackInfo(Uns32T trackID, Uns32T spos, Uns32T nPntBits){ | |
29 assert(nPntBits); | |
30 return (trackID << nPntBits) | spos; | |
31 } | |
32 | 21 |
33 /************************* LSH indexing and query initialization *****************/ | 22 /************************* LSH indexing and query initialization *****************/ |
34 | 23 |
35 char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){ | 24 char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){ |
36 char* indexName = new char[MAXSTR]; | 25 char* indexName = new char[MAXSTR]; |
264 char* mergeIndexName = newIndexName; | 253 char* mergeIndexName = newIndexName; |
265 | 254 |
266 // Get the lsh header info and find how many tracks are inserted already | 255 // Get the lsh header info and find how many tracks are inserted already |
267 lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here | 256 lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here |
268 assert(lsh); | 257 assert(lsh); |
269 Uns32T maxs = index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1; | 258 Uns32T maxs = audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1; |
270 delete lsh; | 259 delete lsh; |
271 lsh = 0; | 260 lsh = 0; |
272 | 261 |
273 // Insert up to lsh_param_b tracks | 262 // Insert up to lsh_param_b tracks |
274 if( !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){ | 263 if( !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){ |
481 Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){ | 470 Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){ |
482 Uns32T collisionCount = 0; | 471 Uns32T collisionCount = 0; |
483 cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE; | 472 cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE; |
484 for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){ | 473 for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){ |
485 if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold))) | 474 if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold))) |
486 collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID, lsh_n_point_bits)); | 475 collisionCount += lsh->insert_point((*vv)[pointID], audiodb_index_from_trackinfo(trackID, pointID, audiodb_lsh_n_point_bits(adb))); |
487 spp+=sequenceHop; | 476 spp+=sequenceHop; |
488 } | 477 } |
489 return collisionCount; | 478 return collisionCount; |
490 } | 479 } |
491 | 480 |
517 VERB_LOG(1,"INDEX: seqlen %d\n", sequenceLength); | 506 VERB_LOG(1,"INDEX: seqlen %d\n", sequenceLength); |
518 VERB_LOG(1,"INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); | 507 VERB_LOG(1,"INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); |
519 VERB_LOG(1,"INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); | 508 VERB_LOG(1,"INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); |
520 VERB_LOG(1,"INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); | 509 VERB_LOG(1,"INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); |
521 VERB_LOG(1,"INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); | 510 VERB_LOG(1,"INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); |
522 VERB_LOG(1,"INDEX: s %d\n", index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)); | 511 VERB_LOG(1,"INDEX: s %d\n", audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))); |
523 VERB_LOG(1,"INDEX: Opened LSH index file %s\n", indexName); | 512 VERB_LOG(1,"INDEX: Opened LSH index file %s\n", indexName); |
524 } | 513 } |
525 | 514 |
526 // Check to see if we are loading hash tables into core, and do so if true | 515 // Check to see if we are loading hash tables into core, and do so if true |
527 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ | 516 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ |
534 | 523 |
535 delete[] indexName; | 524 delete[] indexName; |
536 return true; | 525 return true; |
537 } | 526 } |
538 | 527 |
539 // *Static* approximate NN point reporter callback method for lshlib | 528 void audiodb_index_add_point_approximate(void *user_data, Uns32T pointID, Uns32T qpos, float dist) { |
540 void audioDB::index_add_point_approximate(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){ | 529 adb_qcallback_t *data = (adb_qcallback_t *) user_data; |
541 assert(instancePtr); // We need an instance for this callback | 530 adb_t *adb = data->adb; |
542 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance | 531 adb_qstate_internal_t *qstate = data->qstate; |
543 Uns32T trackID = index_to_trackID(pointID, myself->lsh_n_point_bits); | 532 uint32_t nbits = audiodb_lsh_n_point_bits(adb); |
544 Uns32T spos = index_to_trackPos(pointID, myself->lsh_n_point_bits); | 533 uint32_t trackID = audiodb_index_to_track_id(pointID, nbits); |
545 // Skip identity in query_from_key | 534 uint32_t spos = audiodb_index_to_track_pos(pointID, nbits); |
546 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) ) { | 535 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); |
536 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { | |
547 adb_result_t r; | 537 adb_result_t r; |
548 r.key = myself->fileTable + trackID * O2_FILETABLE_ENTRY_SIZE; | 538 r.key = (*adb->keys)[trackID].c_str(); |
549 r.dist = dist; | 539 r.dist = dist; |
550 r.qpos = qpos; | 540 r.qpos = qpos; |
551 r.ipos = spos; | 541 r.ipos = spos; |
552 myself->accumulator->add_point(&r); | 542 qstate->accumulator->add_point(&r); |
553 } | 543 } |
554 } | 544 } |
555 | 545 |
556 // *Static* exact NN point reporter callback method for lshlib | 546 // Maintain a queue of points to pass to query_loop_points() for exact |
557 // Maintain a queue of points to pass to query_points() for exact evaluation | 547 // evaluation |
558 void audioDB::index_add_point_exact(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){ | 548 void audiodb_index_add_point_exact(void *user_data, Uns32T pointID, Uns32T qpos, float dist) { |
559 assert(instancePtr); // We need an instance for this callback | 549 adb_qcallback_t *data = (adb_qcallback_t *) user_data; |
560 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance | 550 adb_t *adb = data->adb; |
561 Uns32T trackID = index_to_trackID(pointID, myself->lsh_n_point_bits); | 551 adb_qstate_internal_t *qstate = data->qstate; |
562 Uns32T spos = index_to_trackPos(pointID, myself->lsh_n_point_bits); | 552 uint32_t nbits = audiodb_lsh_n_point_bits(adb); |
563 // Skip identity in query_from_key | 553 uint32_t trackID = audiodb_index_to_track_id(pointID, nbits); |
564 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) ) | 554 uint32_t spos = audiodb_index_to_track_pos(pointID, nbits); |
565 myself->index_insert_exact_evaluation_queue(trackID, qpos, spos); | 555 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); |
566 } | 556 if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { |
567 | 557 PointPair p(trackID, qpos, spos); |
568 void audioDB::initialize_exact_evalutation_queue(){ | 558 qstate->exact_evaluation_queue->push(p); |
569 if(exact_evaluation_queue) | 559 } |
570 delete exact_evaluation_queue; | |
571 exact_evaluation_queue = new priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >; | |
572 } | |
573 | |
574 void audioDB::index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos){ | |
575 PointPair p(trackID, qpos, spos); | |
576 exact_evaluation_queue->push(p); | |
577 } | 560 } |
578 | 561 |
579 // return 0: if index does not exist | 562 // return 0: if index does not exist |
580 // return nqv: if index exists | 563 // return nqv: if index exists |
581 int audioDB::index_query_loop(adb_t *adb, adb_query_spec_t *spec) { | 564 int audioDB::index_query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate) { |
582 | 565 |
583 double *query = 0, *query_data = 0; | 566 double *query = 0, *query_data = 0; |
584 adb_qpointers_internal_t qpointers = {0}; | 567 adb_qpointers_internal_t qpointers = {0}; |
585 | 568 |
569 adb_qcallback_t callback_data; | |
570 callback_data.adb = adb; | |
571 callback_data.qstate = qstate; | |
572 | |
586 void (*add_point_func)(void*,Uns32T,Uns32T,float); | 573 void (*add_point_func)(void*,Uns32T,Uns32T,float); |
587 | 574 |
588 sequenceLength = spec->qid.sequence_length; | 575 sequenceLength = spec->qid.sequence_length; |
589 normalizedDistance = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED); | 576 normalizedDistance = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED); |
590 | 577 |
591 // Set the point-reporter callback based on the value of lsh_exact | 578 // Set the point-reporter callback based on the value of lsh_exact |
592 if(lsh_exact){ | 579 if(lsh_exact) { |
593 initialize_exact_evalutation_queue(); | 580 qstate->exact_evaluation_queue = new std::priority_queue<PointPair>; |
594 add_point_func = &index_add_point_exact; | 581 add_point_func = &audiodb_index_add_point_exact; |
595 } | 582 } else { |
596 else | 583 add_point_func = &audiodb_index_add_point_approximate; |
597 add_point_func = &index_add_point_approximate; | 584 } |
598 | 585 |
599 if(!index_init_query(adb->path)) // sets-up LSH index structures for querying | 586 if(!index_init_query(adb->path)) // sets-up LSH index structures for querying |
600 return 0; | 587 return 0; |
601 | 588 |
602 char* database = index_get_name(adb->path, radius, sequenceLength); | 589 char* database = index_get_name(adb->path, radius, sequenceLength); |
619 // Nq contains number of inspected points in query file, | 606 // Nq contains number of inspected points in query file, |
620 // numVecsAboveThreshold is number of points with power >= absolute_threshold | 607 // numVecsAboveThreshold is number of points with power >= absolute_threshold |
621 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation | 608 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation |
622 if(usingQueryPoint && numVecsAboveThreshold){ | 609 if(usingQueryPoint && numVecsAboveThreshold){ |
623 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) | 610 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) |
624 lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, (void*)this); | 611 lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, &callback_data); |
625 else | 612 else |
626 lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, (void*)this); | 613 lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, &callback_data); |
627 } | 614 } |
628 else if(numVecsAboveThreshold) | 615 else if(numVecsAboveThreshold) |
629 for( Uns32T pointID = 0 ; pointID < Nq; pointID++ ) | 616 for( Uns32T pointID = 0 ; pointID < Nq; pointID++ ) |
630 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) { | 617 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) { |
631 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) { | 618 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) { |
632 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, (void*)this); | 619 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); |
633 } else { | 620 } else { |
634 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, (void*)this); | 621 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); |
635 } | 622 } |
636 } | 623 } |
637 | 624 |
638 if(lsh_exact) | 625 if(lsh_exact) |
639 // Perform exact distance computation on point pairs in exact_evaluation_queue | 626 // Perform exact distance computation on point pairs in exact_evaluation_queue |
640 query_loop_points(adb, spec, query, &qpointers); | 627 query_loop_points(adb, spec, qstate, query, &qpointers); |
641 | 628 |
642 // Close the index file | 629 // Close the index file |
643 close(lshfid); | 630 close(lshfid); |
644 | 631 |
645 // Clean up | 632 // Clean up |