Mercurial > hg > audiodb
changeset 321:da2272e029b3 large_adb
Added --adb_feature_root=path filename prefix for locating LARGE_ADB features with relative paths at QUERY time. Also added convenience argument --adb_root=path prefix for -d database command option.
author | mas01mc |
---|---|
date | Thu, 21 Aug 2008 19:16:21 +0000 |
parents | a995e5ad999a |
children | 634959ef98f2 |
files | audioDB.cpp audioDB.h common.cpp gengetopt.in index.cpp insert.cpp query.cpp |
diffstat | 7 files changed, 87 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB.cpp Wed Aug 20 13:50:58 2008 +0000 +++ b/audioDB.cpp Thu Aug 21 19:16:21 2008 +0000 @@ -34,6 +34,10 @@ error("No command found"); } + // perform dbName path prefix subbsitution + if(adb_root) + prefix_name((char** const)&dbName, adb_root); + if(O2_ACTION(COM_SERVER)) startServer(); @@ -243,6 +247,14 @@ relative_threshold = args_info.relative_threshold_arg; } + if (args_info.adb_root_given){ + adb_root = args_info.adb_root_arg; + } + + if (args_info.adb_feature_root_given){ + adb_feature_root = args_info.adb_feature_root_arg; + } + if(args_info.SERVER_given){ command=COM_SERVER; port=args_info.SERVER_arg;
--- a/audioDB.h Wed Aug 20 13:50:58 2008 +0000 +++ b/audioDB.h Thu Aug 21 19:16:21 2008 +0000 @@ -156,8 +156,8 @@ // We will only use this in a 32-bit address space // So map the off_t down to 32-bits first -#define INSERT_FILETABLE_STRING(OFFSET, STR) \ - strncpy((char*)((Uns32T)OFFSET) + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR)); +#define INSERT_FILETABLE_STRING(TABLE, STR) \ + strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR)); #define SAFE_DELETE(PTR) delete PTR; PTR=0; #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0; @@ -208,8 +208,10 @@ std::ifstream *timesFile; const char *powerFileName; std::ifstream *powerFile; + const char* adb_root; + const char* adb_feature_root; + int powerfd; - int dbfid; int lshfid; bool forWrite; @@ -306,6 +308,8 @@ void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); void insertPowerData(unsigned n, int powerfd, double *powerdata); unsigned getKeyPos(char* key); + void prefix_name(char** const name, const char* prefix); + public: audioDB(const unsigned argc, char* const argv[]); audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse); @@ -394,7 +398,9 @@ timesFile(0), \ powerFileName(0), \ powerFile(0), \ - powerfd(0), \ + adb_root(0), \ + adb_feature_root(0), \ + powerfd(0), \ dbfid(0), \ lshfid(0), \ forWrite(false), \
--- a/common.cpp Wed Aug 20 13:50:58 2008 +0000 +++ b/common.cpp Thu Aug 21 19:16:21 2008 +0000 @@ -230,3 +230,21 @@ initInputFile(inFile); } +// If name is relative path, side effect name with prefix/name +// Do not free original pointer +void audioDB::prefix_name(char** const name, const char* prefix){ + // No prefix if prefix is empty + if(!prefix) + return; + // Allocate new memory, keep old memory + assert(name && *name); + if (strlen(*name) + strlen(prefix) + 1 > O2_MAXFILESTR) + error("error: path prefix + filename too long",prefix); + // Do not prefix absolute path+filename + if(**name=='/') + return; + // OK to prefix relative path+filename + char* prefixedName = (char*) malloc(O2_MAXFILESTR); + sprintf(prefixedName, "%s/%s", prefix, *name); + *name = prefixedName; // side effect new name to old name +}
--- a/gengetopt.in Wed Aug 20 13:50:58 2008 +0000 +++ b/gengetopt.in Thu Aug 21 19:16:21 2008 +0000 @@ -8,6 +8,7 @@ section "Database Operations" sectiondesc="All database operations require a database argument." option "database" d "database file required by Database commands." string typestr="filename" optional +option "adb_root" - "path prefix for database" string typestr="path" dependon="database" optional section "Database Creation" sectiondesc="Creating a new database file." @@ -23,7 +24,7 @@ option "output" - "output directory" string dependon="DUMP" default="audioDB.dump" optional option "L2NORM" L "unit norm vectors and norm all future inserts." dependon="database" optional option "POWER" P "turn on power flag for database." dependon="database" optional -option "INDEX" X "build an index for -d database at -R radius" dependon="database" dependon="radius" optional + section "Database Information" sectiondesc="Information about databases." option "STATUS" S "output database information to stdout." dependon="database" optional @@ -33,7 +34,7 @@ section "Database Insertion" sectiondesc="The following commands insert feature files, with optional keys and timestamps.\n" option "INSERT" I "add feature vectors to an existing database." dependon="features" optional -option "UPDATE" U "replace inserted vectors associated with key with new input vectors." dependon="features" dependon="key" dependon="database" optional hidden +option "adb_feature_root" - "path prefix for feature files, times files and power files" string typestr="path" optional option "features" f "binary series of vectors file {int sz:ieee double[][sz]:eof}." string typestr="filename" dependon="database" optional option "times" t "list of time points (ascii) for feature vectors." string typestr="filename" dependon="features" optional option "power" w "binary power feature file." string typestr="filename" dependon="database" optional @@ -62,6 +63,7 @@ section "Locality-sensitive hashing (LSH) parameters" sectiondesc="These parameters control LSH indexing and retrieval\n" +option "INDEX" X "build an index for -d database at -R radius and -l sequenceLength" dependon="database" dependon="radius" optional option "lsh_w" - "width of LSH hash-function bins. " double default="4.0" dependon="INDEX" optional hidden option "lsh_k" - "even number of independent hash functions to employ with LSH" int typestr="size" default="8" dependon="INDEX" optional option "lsh_m" - "number of hash tables is m(m-1)/2" int typestr="size" default="5" dependon="INDEX" optional @@ -79,9 +81,10 @@ section "Web Services" sectiondesc="These commands enable the database process to establish a connection via the internet and operate as separate client and server processes.\n" option "SERVER" s "run as standalone web service on named port." int typestr="port" default="14475" optional +option "load_index" - "make web service with memory-resident hashtables" flag off dependon="radius" optional option "client" c "run as a client using named host service." string typestr="hostname:port" optional -option "load_index" - "make web service with memory-resident hashtables" flag off dependon="radius" optional + text " -Copyright (c) 2007 Michael Casey, Christophe Rhodes +Copyright (c) 2007-2008 Michael Casey, Christophe Rhodes Goldsmiths, University of London"
--- a/index.cpp Wed Aug 20 13:50:58 2008 +0000 +++ b/index.cpp Thu Aug 21 19:16:21 2008 +0000 @@ -57,6 +57,10 @@ return true; } +// If we are a server and have a memory-resident index, check the indexName against the resident index (using get_indexName()) +// If they match, i.e. path+dbName_resident == path+dbName_requested, use +// the memory-resident index. +// Else allocate a new LSH instance and load the index from disk LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){ LSH* gIndx=SERVER_LSH_INDEX_SINGLETON; if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) ) @@ -245,18 +249,24 @@ // Allocate and read the power sequence if(trackTable[trackID]>=sequenceLength){ - + + char* prefixedString = new char[O2_MAXFILESTR]; + char* tmpStr = prefixedString; // Open and check dimensions of power file - powerfd = open(powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O_RDONLY); + strncpy(prefixedString, powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); + prefix_name((char ** const)&prefixedString, adb_feature_root); + if(prefixedString!=tmpStr) + delete[] tmpStr; + powerfd = open(prefixedString, O_RDONLY); if (powerfd < 0) { - error("failed to open power file", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE); + error("failed to open power file", prefixedString); } if (fstat(powerfd, &statbuf) < 0) { - error("fstat error finding size of power file", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, "fstat"); + error("fstat error finding size of power file", prefixedString, "fstat"); } if( (statbuf.st_size - sizeof(int)) / (sizeof(double)) != trackTable[trackID] ) - error("Dimension mismatch: numPowers != numVectors", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE); + error("Dimension mismatch: numPowers != numVectors", prefixedString); *sPowerp = new double[trackTable[trackID]]; // Allocate memory for power values assert(*sPowerp); @@ -292,8 +302,14 @@ int trackfd = dbfid; for(trackID = start_track ; trackID < end_track ; trackID++ ){ if( dbH->flags & O2_FLAG_LARGE_ADB ){ + char* prefixedString = new char[O2_MAXFILESTR]; + char* tmpStr = prefixedString; // Open and check dimensions of feature file - initInputFile(featureFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, false); // nommap, file pointer at correct position + strncpy(prefixedString, featureFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); + prefix_name((char ** const) &prefixedString, adb_feature_root); + if(prefixedString!=tmpStr) + delete[] tmpStr; + initInputFile(prefixedString, false); // nommap, file pointer at correct position trackfd = infid; } read_data(trackfd, trackID, &fvp, &nfv); // over-writes fvp and nfv
--- a/insert.cpp Wed Aug 20 13:50:58 2008 +0000 +++ b/insert.cpp Thu Aug 21 19:16:21 2008 +0000 @@ -492,8 +492,10 @@ } } // CLEAN UP - munmap(indata,statbuf.st_size); - close(infid); + if(indata) + munmap(indata,statbuf.st_size); + if(infid>0) + close(infid); } while(!filesIn->eof()); VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
--- a/query.cpp Wed Aug 20 13:50:58 2008 +0000 +++ b/query.cpp Thu Aug 21 19:16:21 2008 +0000 @@ -345,7 +345,13 @@ if( dbH->flags & O2_FLAG_LARGE_ADB ){ if(infid>0) close(infid); - initInputFile(featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, false); // nommap, file pointer at correct position + char* prefixedString = new char[O2_MAXFILESTR]; + char* tmpStr = prefixedString; + strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); + prefix_name(&prefixedString, adb_feature_root); + if(tmpStr!=prefixedString) + delete[] tmpStr; + initInputFile(prefixedString, false); // nommap, file pointer at correct position size_t allocatedSize = 0; read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize // Consistency check on allocated memory and query feature size @@ -531,6 +537,8 @@ trackOffset=0; trackIndexOffset=0; if(currentTrack!=pp.trackID){ + char* prefixedString = new char[O2_MAXFILESTR]; + char* tmpStr = prefixedString; // On currentTrack change, allocate and load track data currentTrack=pp.trackID; SAFE_DELETE_ARRAY(sNorm); @@ -538,7 +546,11 @@ if(infid>0) close(infid); // Open and check dimensions of feature file - initInputFile(featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, false); // nommap, file pointer at correct position + strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); + prefix_name((char ** const) &prefixedString, adb_feature_root); + if (prefixedString!=tmpStr) + delete[] tmpStr; + initInputFile(prefixedString, false); // nommap, file pointer at correct position // Load the feature vector data for current track into data_buffer read_data(infid, pp.trackID, &data_buffer, &data_buffer_size); // Load power and calculate power and l2norm sequence sums