comparison audioDB.cpp @ 324:c93be2f3a674

Merge of branches/large_adb -r 514:524 onto the trunk. No conflicts. Added LARGE_ADB support. Turn on with --ntracks 20001 or greater. Use --adb_feature_root to locate feature files at QUERY time. A bug fix in LSH indexing that was incorrectly thresholding large numbers of shingles.
author mas01mc
date Thu, 21 Aug 2008 21:28:33 +0000
parents b671a46873c2
children 7ff56cce3297
comparison
equal deleted inserted replaced
315:d2c56d4f841e 324:c93be2f3a674
1 #include "audioDB.h" 1 #include "audioDB.h"
2 2
3 LSH* SERVER_LSH_INDEX_SINGLETON; 3 LSH* SERVER_LSH_INDEX_SINGLETON;
4 char* SERVER_ADB_ROOT;
5 char* SERVER_ADB_FEATURE_ROOT;
4 6
5 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){}; 7 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){};
6 8
7 bool operator<(const PointPair& a, const PointPair& b){ 9 bool operator<(const PointPair& a, const PointPair& b){
8 return ( (a.qpos<b.qpos) || 10 return ( (a.trackID<b.trackID) ||
9 ((a.qpos==b.qpos) && 11 ( (a.trackID==b.trackID) &&
10 ( (a.trackID<b.trackID)) || ((a.trackID==b.trackID)&&(a.spos<b.spos)) ) ); 12 ( (a.spos<b.spos) || ( (a.spos==b.spos) && (a.qpos < b.qpos) )) ) );
11 } 13 }
12 14
13 bool operator>(const PointPair& a, const PointPair& b){ 15 bool operator>(const PointPair& a, const PointPair& b){
14 return ( (a.qpos>b.qpos) || 16 return ( (a.trackID>b.trackID) ||
15 ((a.qpos==b.qpos) && 17 ( (a.trackID==b.trackID) &&
16 ( (a.trackID>b.trackID)) || ((a.trackID==b.trackID)&&(a.spos>b.spos)) ) ); 18 ( (a.spos>b.spos) || ( (a.spos==b.spos) && (a.qpos > b.qpos) )) ) );
17 } 19 }
18 20
19 bool operator==(const PointPair& a, const PointPair& b){ 21 bool operator==(const PointPair& a, const PointPair& b){
20 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) ); 22 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) );
21 } 23 }
32 printf("%s\n", gengetopt_args_info_help[2]); 34 printf("%s\n", gengetopt_args_info_help[2]);
33 printf("%s\n", gengetopt_args_info_help[0]); 35 printf("%s\n", gengetopt_args_info_help[0]);
34 error("No command found"); 36 error("No command found");
35 } 37 }
36 38
39 // Perform database prefix substitution
40 if(adb_root)
41 prefix_name((char** const)&dbName, adb_root);
42
37 if(O2_ACTION(COM_SERVER)) 43 if(O2_ACTION(COM_SERVER))
38 startServer(); 44 startServer();
39 45
40 else if(O2_ACTION(COM_CREATE)) 46 else if(O2_ACTION(COM_CREATE))
41 create(dbName); 47 create(dbName);
84 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS 90 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
85 { 91 {
86 try { 92 try {
87 isServer = 1; // FIXME: Hack 93 isServer = 1; // FIXME: Hack
88 processArgs(argc, argv); 94 processArgs(argc, argv);
95 // Perform database prefix substitution
96 if(adb_root)
97 prefix_name((char** const)&dbName, adb_root);
89 assert(O2_ACTION(COM_QUERY)); 98 assert(O2_ACTION(COM_QUERY));
90 query(dbName, inFile, adbQueryResponse); 99 query(dbName, inFile, adbQueryResponse);
91 } catch(char *err) { 100 } catch(char *err) {
92 cleanup(); 101 cleanup();
93 throw(err); 102 throw(err);
97 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS 106 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
98 { 107 {
99 try { 108 try {
100 isServer = 1; // FIXME: Hack 109 isServer = 1; // FIXME: Hack
101 processArgs(argc, argv); 110 processArgs(argc, argv);
111 // Perform database prefix substitution
112 if(adb_root)
113 prefix_name((char** const)&dbName, adb_root);
102 assert(O2_ACTION(COM_STATUS)); 114 assert(O2_ACTION(COM_STATUS));
103 status(dbName, adbStatusResponse); 115 status(dbName, adbStatusResponse);
104 } catch(char *err) { 116 } catch(char *err) {
105 cleanup(); 117 cleanup();
106 throw(err); 118 throw(err);
123 munmap(timesTable, timesTableLength); 135 munmap(timesTable, timesTableLength);
124 if(powerTable) 136 if(powerTable)
125 munmap(powerTable, powerTableLength); 137 munmap(powerTable, powerTableLength);
126 if(l2normTable) 138 if(l2normTable)
127 munmap(l2normTable, l2normTableLength); 139 munmap(l2normTable, l2normTableLength);
140 if(featureFileNameTable)
141 munmap(featureFileNameTable, fileTableLength);
142 if(timesFileNameTable)
143 munmap(timesFileNameTable, fileTableLength);
144 if(powerFileNameTable)
145 munmap(powerFileNameTable, fileTableLength);
128 if(trackOffsetTable) 146 if(trackOffsetTable)
129 delete trackOffsetTable; 147 delete trackOffsetTable;
130 if(reporter) 148 if(reporter)
131 delete reporter; 149 delete reporter;
132 if(exact_evaluation_queue) 150 if(exact_evaluation_queue)
235 if (args_info.relative_threshold_given) { 253 if (args_info.relative_threshold_given) {
236 use_relative_threshold = true; 254 use_relative_threshold = true;
237 relative_threshold = args_info.relative_threshold_arg; 255 relative_threshold = args_info.relative_threshold_arg;
238 } 256 }
239 257
258 if (args_info.adb_root_given){
259 adb_root = args_info.adb_root_arg;
260 }
261
262 if (args_info.adb_feature_root_given){
263 adb_feature_root = args_info.adb_feature_root_arg;
264 }
265
266 // perform dbName path prefix SERVER-side subsitution
267 if(SERVER_ADB_ROOT && !adb_root)
268 adb_root = SERVER_ADB_ROOT;
269 if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root)
270 adb_feature_root = SERVER_ADB_FEATURE_ROOT;
271
240 if(args_info.SERVER_given){ 272 if(args_info.SERVER_given){
241 command=COM_SERVER; 273 command=COM_SERVER;
242 port=args_info.SERVER_arg; 274 port=args_info.SERVER_arg;
243 if(port<100 || port > 100000) 275 if(port<100 || port > 100000)
244 error("port out of range"); 276 error("port out of range");
525 // Update Header information 557 // Update Header information
526 std::cout << "num files:" << dbH->numFiles << std::endl; 558 std::cout << "num files:" << dbH->numFiles << std::endl;
527 std::cout << "data dim:" << dbH->dim <<std::endl; 559 std::cout << "data dim:" << dbH->dim <<std::endl;
528 if(dbH->dim>0){ 560 if(dbH->dim>0){
529 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl; 561 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl;
530 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; 562 if(dbH->flags & O2_FLAG_LARGE_ADB)
531 } 563 std::cout << "vectors available:" << O2_MAX_VECTORS - (dbH->length / (sizeof(double)*dbH->dim)) << std::endl;
532 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; 564 else
533 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << 565 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
534 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; 566 }
567 if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
568 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
569 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
570 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
571 }
535 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM) 572 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM)
536 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX) 573 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX)
537 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER) 574 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER)
538 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) << "]" << endl; 575 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES)
576 << "] largeADB[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_LARGE_ADB)
577 << "]" << endl;
578
539 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; 579 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl;
540 } else { 580 } else {
541 adbStatusResponse->result.numFiles = dbH->numFiles; 581 adbStatusResponse->result.numFiles = dbH->numFiles;
542 adbStatusResponse->result.dim = dbH->dim; 582 adbStatusResponse->result.dim = dbH->dim;
543 adbStatusResponse->result.length = dbH->length; 583 adbStatusResponse->result.length = dbH->length;
548 } 588 }
549 589
550 void audioDB::l2norm(const char* dbName) { 590 void audioDB::l2norm(const char* dbName) {
551 forWrite = true; 591 forWrite = true;
552 initTables(dbName, 0); 592 initTables(dbName, 0);
553 if(dbH->length>0){ 593 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
554 /* FIXME: should probably be uint64_t */ 594 /* FIXME: should probably be uint64_t */
555 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); 595 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
556 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); 596 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
557 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append 597 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
558 } 598 }
561 memcpy (db, dbH, O2_HEADERSIZE); 601 memcpy (db, dbH, O2_HEADERSIZE);
562 } 602 }
563 603
564 void audioDB::power_flag(const char *dbName) { 604 void audioDB::power_flag(const char *dbName) {
565 forWrite = true; 605 forWrite = true;
566 initTables(dbName, 0); 606 initTables(dbName, 0);
567 if (dbH->length > 0) { 607 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
568 error("cannot turn on power storage for non-empty database", dbName); 608 error("cannot turn on power storage for non-empty database", dbName);
569 } 609 }
570 dbH->flags |= O2_FLAG_POWER; 610 dbH->flags |= O2_FLAG_POWER;
571 memcpy(db, dbH, O2_HEADERSIZE); 611 memcpy(db, dbH, O2_HEADERSIZE);
572 } 612 }
581 double *p; 621 double *p;
582 unsigned nn = n; 622 unsigned nn = n;
583 623
584 assert(l2normTable); 624 assert(l2normTable);
585 625
586 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) 626 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) )
587 error("Database is already L2 normed", "automatic norm on insert is enabled"); 627 error("Database is already L2 normed", "automatic norm on insert is enabled");
588 628
589 VERB_LOG(2, "norming %u vectors...", n); 629 VERB_LOG(2, "norming %u vectors...", n);
590 630
591 double* l2buf = new double[n]; 631 double* l2buf = new double[n];
622 662
623 // This entry point is visited once per instance 663 // This entry point is visited once per instance
624 // so it is a good place to set any global state variables 664 // so it is a good place to set any global state variables
625 int main(const unsigned argc, char* const argv[]){ 665 int main(const unsigned argc, char* const argv[]){
626 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables 666 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables
667 SERVER_ADB_ROOT = 0; // Server-side database root prefix
668 SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix
627 audioDB(argc, argv); 669 audioDB(argc, argv);
628 } 670 }