diff audioDB.cpp @ 324:c93be2f3a674

Merge of branches/large_adb -r 514:524 onto the trunk. No conflicts. Added LARGE_ADB support. Turn on with --ntracks 20001 or greater. Use --adb_feature_root to locate feature files at QUERY time. A bug fix in LSH indexing that was incorrectly thresholding large numbers of shingles.
author mas01mc
date Thu, 21 Aug 2008 21:28:33 +0000
parents b671a46873c2
children 7ff56cce3297
line wrap: on
line diff
--- a/audioDB.cpp	Tue Aug 12 14:25:51 2008 +0000
+++ b/audioDB.cpp	Thu Aug 21 21:28:33 2008 +0000
@@ -1,19 +1,21 @@
 #include "audioDB.h"
 
 LSH* SERVER_LSH_INDEX_SINGLETON;
+char* SERVER_ADB_ROOT;
+char* SERVER_ADB_FEATURE_ROOT;
 
 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){};
 
 bool operator<(const PointPair& a, const PointPair& b){
-  return ( (a.qpos<b.qpos) || 
-	   ((a.qpos==b.qpos) && 
-	    ( (a.trackID<b.trackID)) || ((a.trackID==b.trackID)&&(a.spos<b.spos)) ) );	    
+  return ( (a.trackID<b.trackID) ||
+	   ( (a.trackID==b.trackID) &&  
+	     ( (a.spos<b.spos) || ( (a.spos==b.spos) && (a.qpos < b.qpos) )) ) );
 }
 
 bool operator>(const PointPair& a, const PointPair& b){
-  return ( (a.qpos>b.qpos) || 
-	   ((a.qpos==b.qpos) && 
-	    ( (a.trackID>b.trackID)) || ((a.trackID==b.trackID)&&(a.spos>b.spos)) ) );
+  return ( (a.trackID>b.trackID) ||
+	   ( (a.trackID==b.trackID) &&  
+	     ( (a.spos>b.spos) || ( (a.spos==b.spos) && (a.qpos > b.qpos) )) ) );
 }
 
 bool operator==(const PointPair& a, const PointPair& b){
@@ -34,6 +36,10 @@
     error("No command found");
   }
 
+  // Perform database prefix substitution
+  if(adb_root)
+    prefix_name((char** const)&dbName, adb_root);
+
   if(O2_ACTION(COM_SERVER))
     startServer();
 
@@ -86,6 +92,9 @@
   try {
     isServer = 1; // FIXME: Hack
     processArgs(argc, argv);
+    // Perform database prefix substitution
+    if(adb_root)
+      prefix_name((char** const)&dbName, adb_root);
     assert(O2_ACTION(COM_QUERY));
     query(dbName, inFile, adbQueryResponse);
   } catch(char *err) {
@@ -99,6 +108,9 @@
   try {
     isServer = 1; // FIXME: Hack
     processArgs(argc, argv);
+    // Perform database prefix substitution
+    if(adb_root)
+      prefix_name((char** const)&dbName, adb_root);
     assert(O2_ACTION(COM_STATUS));
     status(dbName, adbStatusResponse);
   } catch(char *err) {
@@ -125,6 +137,12 @@
     munmap(powerTable, powerTableLength);
   if(l2normTable)
     munmap(l2normTable, l2normTableLength);
+  if(featureFileNameTable)
+    munmap(featureFileNameTable, fileTableLength);
+  if(timesFileNameTable)
+    munmap(timesFileNameTable, fileTableLength);
+  if(powerFileNameTable)
+    munmap(powerFileNameTable, fileTableLength);
   if(trackOffsetTable)
     delete trackOffsetTable;
   if(reporter)
@@ -237,6 +255,20 @@
     relative_threshold = args_info.relative_threshold_arg;
   }
 
+  if (args_info.adb_root_given){
+    adb_root = args_info.adb_root_arg;
+  }
+
+  if (args_info.adb_feature_root_given){
+    adb_feature_root = args_info.adb_feature_root_arg;
+  }
+
+  // perform dbName path prefix SERVER-side subsitution
+  if(SERVER_ADB_ROOT && !adb_root)
+    adb_root = SERVER_ADB_ROOT;
+  if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root)
+    adb_feature_root = SERVER_ADB_FEATURE_ROOT;
+  
   if(args_info.SERVER_given){
     command=COM_SERVER;
     port=args_info.SERVER_arg;
@@ -527,15 +559,23 @@
     std::cout << "data dim:" << dbH->dim <<std::endl;
     if(dbH->dim>0){
       std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl;
-      std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
+      if(dbH->flags & O2_FLAG_LARGE_ADB)
+	std::cout << "vectors available:" << O2_MAX_VECTORS - (dbH->length / (sizeof(double)*dbH->dim)) << std::endl;
+      else
+	std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
     }
-    std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
-    std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
-      (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
+    if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
+      std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
+      std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
+	(100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
+    }
     std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM)
 	      << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX)
 	      << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER)
-	      << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) << "]" << endl;    
+	      << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) 
+	      << "] largeADB[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_LARGE_ADB)
+	      << "]" << endl;    
+              
     std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl;    
   } else {
     adbStatusResponse->result.numFiles = dbH->numFiles;
@@ -550,7 +590,7 @@
 void audioDB::l2norm(const char* dbName) {
   forWrite = true;
   initTables(dbName, 0);
-  if(dbH->length>0){
+  if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
     /* FIXME: should probably be uint64_t */
     unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
     CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
@@ -563,8 +603,8 @@
 
 void audioDB::power_flag(const char *dbName) {
   forWrite = true;
-  initTables(dbName, 0);
-  if (dbH->length > 0) {
+  initTables(dbName, 0);  
+  if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
     error("cannot turn on power storage for non-empty database", dbName);
   }
   dbH->flags |= O2_FLAG_POWER;
@@ -583,7 +623,7 @@
 
   assert(l2normTable);
 
-  if( !append && (dbH->flags & O2_FLAG_L2NORM) )
+  if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) )
     error("Database is already L2 normed", "automatic norm on insert is enabled");
 
   VERB_LOG(2, "norming %u vectors...", n);
@@ -624,5 +664,7 @@
 // so it is a good place to set any global state variables
 int main(const unsigned argc, char* const argv[]){
   SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables
+  SERVER_ADB_ROOT = 0;            // Server-side database root prefix
+  SERVER_ADB_FEATURE_ROOT = 0;    // Server-side features root prefix
   audioDB(argc, argv);
 }