Mercurial > hg > audiodb
comparison audioDB.cpp @ 324:c93be2f3a674
Merge of branches/large_adb -r 514:524 onto the trunk. No conflicts. Added LARGE_ADB support. Turn on with --ntracks 20001 or greater. Use --adb_feature_root to locate feature files at QUERY time. A bug fix in LSH indexing that was incorrectly thresholding large numbers of shingles.
author | mas01mc |
---|---|
date | Thu, 21 Aug 2008 21:28:33 +0000 |
parents | b671a46873c2 |
children | 7ff56cce3297 |
comparison
equal
deleted
inserted
replaced
315:d2c56d4f841e | 324:c93be2f3a674 |
---|---|
1 #include "audioDB.h" | 1 #include "audioDB.h" |
2 | 2 |
3 LSH* SERVER_LSH_INDEX_SINGLETON; | 3 LSH* SERVER_LSH_INDEX_SINGLETON; |
4 char* SERVER_ADB_ROOT; | |
5 char* SERVER_ADB_FEATURE_ROOT; | |
4 | 6 |
5 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){}; | 7 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){}; |
6 | 8 |
7 bool operator<(const PointPair& a, const PointPair& b){ | 9 bool operator<(const PointPair& a, const PointPair& b){ |
8 return ( (a.qpos<b.qpos) || | 10 return ( (a.trackID<b.trackID) || |
9 ((a.qpos==b.qpos) && | 11 ( (a.trackID==b.trackID) && |
10 ( (a.trackID<b.trackID)) || ((a.trackID==b.trackID)&&(a.spos<b.spos)) ) ); | 12 ( (a.spos<b.spos) || ( (a.spos==b.spos) && (a.qpos < b.qpos) )) ) ); |
11 } | 13 } |
12 | 14 |
13 bool operator>(const PointPair& a, const PointPair& b){ | 15 bool operator>(const PointPair& a, const PointPair& b){ |
14 return ( (a.qpos>b.qpos) || | 16 return ( (a.trackID>b.trackID) || |
15 ((a.qpos==b.qpos) && | 17 ( (a.trackID==b.trackID) && |
16 ( (a.trackID>b.trackID)) || ((a.trackID==b.trackID)&&(a.spos>b.spos)) ) ); | 18 ( (a.spos>b.spos) || ( (a.spos==b.spos) && (a.qpos > b.qpos) )) ) ); |
17 } | 19 } |
18 | 20 |
19 bool operator==(const PointPair& a, const PointPair& b){ | 21 bool operator==(const PointPair& a, const PointPair& b){ |
20 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) ); | 22 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) ); |
21 } | 23 } |
32 printf("%s\n", gengetopt_args_info_help[2]); | 34 printf("%s\n", gengetopt_args_info_help[2]); |
33 printf("%s\n", gengetopt_args_info_help[0]); | 35 printf("%s\n", gengetopt_args_info_help[0]); |
34 error("No command found"); | 36 error("No command found"); |
35 } | 37 } |
36 | 38 |
39 // Perform database prefix substitution | |
40 if(adb_root) | |
41 prefix_name((char** const)&dbName, adb_root); | |
42 | |
37 if(O2_ACTION(COM_SERVER)) | 43 if(O2_ACTION(COM_SERVER)) |
38 startServer(); | 44 startServer(); |
39 | 45 |
40 else if(O2_ACTION(COM_CREATE)) | 46 else if(O2_ACTION(COM_CREATE)) |
41 create(dbName); | 47 create(dbName); |
84 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS | 90 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS |
85 { | 91 { |
86 try { | 92 try { |
87 isServer = 1; // FIXME: Hack | 93 isServer = 1; // FIXME: Hack |
88 processArgs(argc, argv); | 94 processArgs(argc, argv); |
95 // Perform database prefix substitution | |
96 if(adb_root) | |
97 prefix_name((char** const)&dbName, adb_root); | |
89 assert(O2_ACTION(COM_QUERY)); | 98 assert(O2_ACTION(COM_QUERY)); |
90 query(dbName, inFile, adbQueryResponse); | 99 query(dbName, inFile, adbQueryResponse); |
91 } catch(char *err) { | 100 } catch(char *err) { |
92 cleanup(); | 101 cleanup(); |
93 throw(err); | 102 throw(err); |
97 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS | 106 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS |
98 { | 107 { |
99 try { | 108 try { |
100 isServer = 1; // FIXME: Hack | 109 isServer = 1; // FIXME: Hack |
101 processArgs(argc, argv); | 110 processArgs(argc, argv); |
111 // Perform database prefix substitution | |
112 if(adb_root) | |
113 prefix_name((char** const)&dbName, adb_root); | |
102 assert(O2_ACTION(COM_STATUS)); | 114 assert(O2_ACTION(COM_STATUS)); |
103 status(dbName, adbStatusResponse); | 115 status(dbName, adbStatusResponse); |
104 } catch(char *err) { | 116 } catch(char *err) { |
105 cleanup(); | 117 cleanup(); |
106 throw(err); | 118 throw(err); |
123 munmap(timesTable, timesTableLength); | 135 munmap(timesTable, timesTableLength); |
124 if(powerTable) | 136 if(powerTable) |
125 munmap(powerTable, powerTableLength); | 137 munmap(powerTable, powerTableLength); |
126 if(l2normTable) | 138 if(l2normTable) |
127 munmap(l2normTable, l2normTableLength); | 139 munmap(l2normTable, l2normTableLength); |
140 if(featureFileNameTable) | |
141 munmap(featureFileNameTable, fileTableLength); | |
142 if(timesFileNameTable) | |
143 munmap(timesFileNameTable, fileTableLength); | |
144 if(powerFileNameTable) | |
145 munmap(powerFileNameTable, fileTableLength); | |
128 if(trackOffsetTable) | 146 if(trackOffsetTable) |
129 delete trackOffsetTable; | 147 delete trackOffsetTable; |
130 if(reporter) | 148 if(reporter) |
131 delete reporter; | 149 delete reporter; |
132 if(exact_evaluation_queue) | 150 if(exact_evaluation_queue) |
235 if (args_info.relative_threshold_given) { | 253 if (args_info.relative_threshold_given) { |
236 use_relative_threshold = true; | 254 use_relative_threshold = true; |
237 relative_threshold = args_info.relative_threshold_arg; | 255 relative_threshold = args_info.relative_threshold_arg; |
238 } | 256 } |
239 | 257 |
258 if (args_info.adb_root_given){ | |
259 adb_root = args_info.adb_root_arg; | |
260 } | |
261 | |
262 if (args_info.adb_feature_root_given){ | |
263 adb_feature_root = args_info.adb_feature_root_arg; | |
264 } | |
265 | |
266 // perform dbName path prefix SERVER-side subsitution | |
267 if(SERVER_ADB_ROOT && !adb_root) | |
268 adb_root = SERVER_ADB_ROOT; | |
269 if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root) | |
270 adb_feature_root = SERVER_ADB_FEATURE_ROOT; | |
271 | |
240 if(args_info.SERVER_given){ | 272 if(args_info.SERVER_given){ |
241 command=COM_SERVER; | 273 command=COM_SERVER; |
242 port=args_info.SERVER_arg; | 274 port=args_info.SERVER_arg; |
243 if(port<100 || port > 100000) | 275 if(port<100 || port > 100000) |
244 error("port out of range"); | 276 error("port out of range"); |
525 // Update Header information | 557 // Update Header information |
526 std::cout << "num files:" << dbH->numFiles << std::endl; | 558 std::cout << "num files:" << dbH->numFiles << std::endl; |
527 std::cout << "data dim:" << dbH->dim <<std::endl; | 559 std::cout << "data dim:" << dbH->dim <<std::endl; |
528 if(dbH->dim>0){ | 560 if(dbH->dim>0){ |
529 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl; | 561 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl; |
530 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; | 562 if(dbH->flags & O2_FLAG_LARGE_ADB) |
531 } | 563 std::cout << "vectors available:" << O2_MAX_VECTORS - (dbH->length / (sizeof(double)*dbH->dim)) << std::endl; |
532 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; | 564 else |
533 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << | 565 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; |
534 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; | 566 } |
567 if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){ | |
568 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; | |
569 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << | |
570 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; | |
571 } | |
535 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM) | 572 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM) |
536 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX) | 573 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX) |
537 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER) | 574 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER) |
538 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) << "]" << endl; | 575 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) |
576 << "] largeADB[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_LARGE_ADB) | |
577 << "]" << endl; | |
578 | |
539 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; | 579 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; |
540 } else { | 580 } else { |
541 adbStatusResponse->result.numFiles = dbH->numFiles; | 581 adbStatusResponse->result.numFiles = dbH->numFiles; |
542 adbStatusResponse->result.dim = dbH->dim; | 582 adbStatusResponse->result.dim = dbH->dim; |
543 adbStatusResponse->result.length = dbH->length; | 583 adbStatusResponse->result.length = dbH->length; |
548 } | 588 } |
549 | 589 |
550 void audioDB::l2norm(const char* dbName) { | 590 void audioDB::l2norm(const char* dbName) { |
551 forWrite = true; | 591 forWrite = true; |
552 initTables(dbName, 0); | 592 initTables(dbName, 0); |
553 if(dbH->length>0){ | 593 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){ |
554 /* FIXME: should probably be uint64_t */ | 594 /* FIXME: should probably be uint64_t */ |
555 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); | 595 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); |
556 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); | 596 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); |
557 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append | 597 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append |
558 } | 598 } |
561 memcpy (db, dbH, O2_HEADERSIZE); | 601 memcpy (db, dbH, O2_HEADERSIZE); |
562 } | 602 } |
563 | 603 |
564 void audioDB::power_flag(const char *dbName) { | 604 void audioDB::power_flag(const char *dbName) { |
565 forWrite = true; | 605 forWrite = true; |
566 initTables(dbName, 0); | 606 initTables(dbName, 0); |
567 if (dbH->length > 0) { | 607 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){ |
568 error("cannot turn on power storage for non-empty database", dbName); | 608 error("cannot turn on power storage for non-empty database", dbName); |
569 } | 609 } |
570 dbH->flags |= O2_FLAG_POWER; | 610 dbH->flags |= O2_FLAG_POWER; |
571 memcpy(db, dbH, O2_HEADERSIZE); | 611 memcpy(db, dbH, O2_HEADERSIZE); |
572 } | 612 } |
581 double *p; | 621 double *p; |
582 unsigned nn = n; | 622 unsigned nn = n; |
583 | 623 |
584 assert(l2normTable); | 624 assert(l2normTable); |
585 | 625 |
586 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) | 626 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) ) |
587 error("Database is already L2 normed", "automatic norm on insert is enabled"); | 627 error("Database is already L2 normed", "automatic norm on insert is enabled"); |
588 | 628 |
589 VERB_LOG(2, "norming %u vectors...", n); | 629 VERB_LOG(2, "norming %u vectors...", n); |
590 | 630 |
591 double* l2buf = new double[n]; | 631 double* l2buf = new double[n]; |
622 | 662 |
623 // This entry point is visited once per instance | 663 // This entry point is visited once per instance |
624 // so it is a good place to set any global state variables | 664 // so it is a good place to set any global state variables |
625 int main(const unsigned argc, char* const argv[]){ | 665 int main(const unsigned argc, char* const argv[]){ |
626 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables | 666 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables |
667 SERVER_ADB_ROOT = 0; // Server-side database root prefix | |
668 SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix | |
627 audioDB(argc, argv); | 669 audioDB(argc, argv); |
628 } | 670 } |