Mercurial > hg > audiodb
diff audioDB.cpp @ 324:c93be2f3a674
Merge of branches/large_adb -r 514:524 onto the trunk. No conflicts. Added LARGE_ADB support. Turn on with --ntracks 20001 or greater. Use --adb_feature_root to locate feature files at QUERY time. A bug fix in LSH indexing that was incorrectly thresholding large numbers of shingles.
author | mas01mc |
---|---|
date | Thu, 21 Aug 2008 21:28:33 +0000 |
parents | b671a46873c2 |
children | 7ff56cce3297 |
line wrap: on
line diff
--- a/audioDB.cpp Tue Aug 12 14:25:51 2008 +0000 +++ b/audioDB.cpp Thu Aug 21 21:28:33 2008 +0000 @@ -1,19 +1,21 @@ #include "audioDB.h" LSH* SERVER_LSH_INDEX_SINGLETON; +char* SERVER_ADB_ROOT; +char* SERVER_ADB_FEATURE_ROOT; PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){}; bool operator<(const PointPair& a, const PointPair& b){ - return ( (a.qpos<b.qpos) || - ((a.qpos==b.qpos) && - ( (a.trackID<b.trackID)) || ((a.trackID==b.trackID)&&(a.spos<b.spos)) ) ); + return ( (a.trackID<b.trackID) || + ( (a.trackID==b.trackID) && + ( (a.spos<b.spos) || ( (a.spos==b.spos) && (a.qpos < b.qpos) )) ) ); } bool operator>(const PointPair& a, const PointPair& b){ - return ( (a.qpos>b.qpos) || - ((a.qpos==b.qpos) && - ( (a.trackID>b.trackID)) || ((a.trackID==b.trackID)&&(a.spos>b.spos)) ) ); + return ( (a.trackID>b.trackID) || + ( (a.trackID==b.trackID) && + ( (a.spos>b.spos) || ( (a.spos==b.spos) && (a.qpos > b.qpos) )) ) ); } bool operator==(const PointPair& a, const PointPair& b){ @@ -34,6 +36,10 @@ error("No command found"); } + // Perform database prefix substitution + if(adb_root) + prefix_name((char** const)&dbName, adb_root); + if(O2_ACTION(COM_SERVER)) startServer(); @@ -86,6 +92,9 @@ try { isServer = 1; // FIXME: Hack processArgs(argc, argv); + // Perform database prefix substitution + if(adb_root) + prefix_name((char** const)&dbName, adb_root); assert(O2_ACTION(COM_QUERY)); query(dbName, inFile, adbQueryResponse); } catch(char *err) { @@ -99,6 +108,9 @@ try { isServer = 1; // FIXME: Hack processArgs(argc, argv); + // Perform database prefix substitution + if(adb_root) + prefix_name((char** const)&dbName, adb_root); assert(O2_ACTION(COM_STATUS)); status(dbName, adbStatusResponse); } catch(char *err) { @@ -125,6 +137,12 @@ munmap(powerTable, powerTableLength); if(l2normTable) munmap(l2normTable, l2normTableLength); + if(featureFileNameTable) + munmap(featureFileNameTable, fileTableLength); + if(timesFileNameTable) + munmap(timesFileNameTable, fileTableLength); + if(powerFileNameTable) + munmap(powerFileNameTable, fileTableLength); if(trackOffsetTable) delete trackOffsetTable; if(reporter) @@ -237,6 +255,20 @@ relative_threshold = args_info.relative_threshold_arg; } + if (args_info.adb_root_given){ + adb_root = args_info.adb_root_arg; + } + + if (args_info.adb_feature_root_given){ + adb_feature_root = args_info.adb_feature_root_arg; + } + + // perform dbName path prefix SERVER-side subsitution + if(SERVER_ADB_ROOT && !adb_root) + adb_root = SERVER_ADB_ROOT; + if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root) + adb_feature_root = SERVER_ADB_FEATURE_ROOT; + if(args_info.SERVER_given){ command=COM_SERVER; port=args_info.SERVER_arg; @@ -527,15 +559,23 @@ std::cout << "data dim:" << dbH->dim <<std::endl; if(dbH->dim>0){ std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl; - std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; + if(dbH->flags & O2_FLAG_LARGE_ADB) + std::cout << "vectors available:" << O2_MAX_VECTORS - (dbH->length / (sizeof(double)*dbH->dim)) << std::endl; + else + std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; } - std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; - std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << - (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; + if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){ + std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; + std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << + (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; + } std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM) << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX) << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER) - << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) << "]" << endl; + << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) + << "] largeADB[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_LARGE_ADB) + << "]" << endl; + std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; } else { adbStatusResponse->result.numFiles = dbH->numFiles; @@ -550,7 +590,7 @@ void audioDB::l2norm(const char* dbName) { forWrite = true; initTables(dbName, 0); - if(dbH->length>0){ + if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){ /* FIXME: should probably be uint64_t */ unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); @@ -563,8 +603,8 @@ void audioDB::power_flag(const char *dbName) { forWrite = true; - initTables(dbName, 0); - if (dbH->length > 0) { + initTables(dbName, 0); + if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){ error("cannot turn on power storage for non-empty database", dbName); } dbH->flags |= O2_FLAG_POWER; @@ -583,7 +623,7 @@ assert(l2normTable); - if( !append && (dbH->flags & O2_FLAG_L2NORM) ) + if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) ) error("Database is already L2 normed", "automatic norm on insert is enabled"); VERB_LOG(2, "norming %u vectors...", n); @@ -624,5 +664,7 @@ // so it is a good place to set any global state variables int main(const unsigned argc, char* const argv[]){ SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables + SERVER_ADB_ROOT = 0; // Server-side database root prefix + SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix audioDB(argc, argv); }