# HG changeset patch # User mas01cr # Date 1208339983 0 # Node ID 4dcb09f5fe85fe8862194ffd40cdc884edac16e1 # Parent fe922b9d87f86f8b3494c481d0fa6b3a7db0d07a Commit patch deprecating the --size argument, replacing it with --ntracks, --datadims and --datasize. These names are not ideal, but will serve for now. diff -r fe922b9d87f8 -r 4dcb09f5fe85 audioDB.cpp --- a/audioDB.cpp Sat Apr 12 13:28:30 2008 +0000 +++ b/audioDB.cpp Wed Apr 16 09:59:43 2008 +0000 @@ -136,10 +136,32 @@ } if(args_info.size_given) { + if(args_info.datasize_given) { + error("both --size and --datasize given", ""); + } + if(args_info.ntracks_given) { + error("both --size and --ntracks given", ""); + } + if(args_info.datadim_given) { + error("both --size and --datadim given", ""); + } if (args_info.size_arg < 50 || args_info.size_arg > 32000) { error("Size out of range", ""); } - size = (off_t) args_info.size_arg * 1000000; + double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE); + /* FIXME: what's the safe way of doing this? */ + datasize = (unsigned int) ceil(datasize * ratio); + ntracks = (unsigned int) ceil(ntracks * ratio); + } else { + if(args_info.datasize_given) { + datasize = args_info.datasize_arg; + } + if(args_info.ntracks_given) { + ntracks = args_info.ntracks_arg; + } + if(args_info.datadim_given) { + datadim = args_info.datadim_arg; + } } if(args_info.radius_given) { diff -r fe922b9d87f8 -r 4dcb09f5fe85 audioDB.h --- a/audioDB.h Sat Apr 12 13:28:30 2008 +0000 +++ b/audioDB.h Wed Apr 16 09:59:43 2008 +0000 @@ -57,10 +57,14 @@ //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size +#define O2_DEFAULT_DATASIZE (1355U) // in MB +#define O2_DEFAULT_NTRACKS (20000U) +#define O2_DEFAULT_DATADIM (9U) + #define O2_MAXFILES (20000U) #define O2_MAXFILESTR (256U) -#define O2_FILETABLESIZE (O2_MAXFILESTR) -#define O2_TRACKTABLESIZE (sizeof(unsigned)) +#define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR) +#define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned)) #define O2_HEADERSIZE (sizeof(dbTableHeaderT)) #define O2_MEANNUMVECTORS (1000U) #define O2_MAXDIM (1000U) @@ -170,7 +174,12 @@ // Flags and parameters unsigned verbosity; // how much do we want to know? - off_t size; // given size (for creation) + + //off_t size; // given size (for creation) + unsigned datasize; // size in MB + unsigned ntracks; + unsigned datadim; + unsigned queryType; // point queries default unsigned pointNN; // how many point NNs ? unsigned trackNN; // how many track NNs ? @@ -281,7 +290,9 @@ powerTableLength(0), \ l2normTableLength(0), \ verbosity(1), \ - size(O2_DEFAULTDBSIZE), \ + datasize(O2_DEFAULT_DATASIZE), \ + ntracks(O2_DEFAULT_NTRACKS), \ + datadim(O2_DEFAULT_DATADIM), \ queryType(O2_POINT_QUERY), \ pointNN(O2_DEFAULT_POINTNN), \ trackNN(O2_DEFAULT_TRACKNN), \ diff -r fe922b9d87f8 -r 4dcb09f5fe85 common.cpp --- a/common.cpp Sat Apr 12 13:28:30 2008 +0000 +++ b/common.cpp Wed Apr 16 09:59:43 2008 +0000 @@ -115,8 +115,8 @@ powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset; l2normTableLength = dbH->dbSize - dbH->l2normTableOffset; } else { - fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE); - trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE); + fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE); + trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLE_ENTRY_SIZE); dataBufLength = ALIGN_PAGE_UP(dbH->length); timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim)); powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); diff -r fe922b9d87f8 -r 4dcb09f5fe85 create.cpp --- a/create.cpp Sat Apr 12 13:28:30 2008 +0000 +++ b/create.cpp Wed Apr 16 09:59:43 2008 +0000 @@ -24,7 +24,7 @@ dbH = new dbTableHeaderT(); assert(dbH); - unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE); + //unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE); // Initialize header dbH->magic = O2_MAGIC; @@ -35,17 +35,21 @@ dbH->headerSize = O2_HEADERSIZE; dbH->length = 0; dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); - dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles); - dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles); - dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); - dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); - dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - 2*maxfiles*O2_MEANNUMVECTORS*sizeof(double)); - dbH->dbSize = size; + dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks); + dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks); + + off_t databytes = ((off_t) datasize) * 1024 * 1024; + off_t auxbytes = databytes / datadim; + + dbH->timesTableOffset = ALIGN_PAGE_UP(dbH->dataOffset + databytes); + dbH->powerTableOffset = ALIGN_PAGE_UP(dbH->timesTableOffset + 2*auxbytes); + dbH->l2normTableOffset = ALIGN_PAGE_UP(dbH->powerTableOffset + auxbytes); + dbH->dbSize = ALIGN_PAGE_UP(dbH->l2normTableOffset + auxbytes); write(dbfid, dbH, O2_HEADERSIZE); // go to the location corresponding to the last byte - if (lseek (dbfid, size - 1, SEEK_SET) == -1) + if (lseek (dbfid, dbH->dbSize - 1, SEEK_SET) == -1) error("lseek error in db file", "", "lseek"); // write a dummy byte at the last location diff -r fe922b9d87f8 -r 4dcb09f5fe85 dump.cpp --- a/dump.cpp Sat Apr 12 13:28:30 2008 +0000 +++ b/dump.cpp Wed Apr 16 09:59:43 2008 +0000 @@ -62,7 +62,7 @@ double *data_buffer; size_t data_buffer_size; for(unsigned k = 0; k < dbH->numFiles; k++) { - fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); + fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLE_ENTRY_SIZE); snprintf(fName, 256, "%05d.features", k); if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { error("error creating feature file", fName, "open"); @@ -130,7 +130,7 @@ } pos += trackTable[k]; - std::cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << std::endl; + std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl; } FILE *scriptFile; @@ -142,7 +142,12 @@ \n\ if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\ if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ -\"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000)); +\"${AUDIODB}\" -d \"$1\" -N --datasize=%d --ntracks=%d --datadim=%d\n", + (int) ((dbH->timesTableOffset - dbH->dataOffset) / (1024*1024)), + // fileTable entries (char[256]) are bigger than trackTable + // (int), so the granularity of page aligning is finer. + (int) ((dbH->trackTableOffset - dbH->fileTableOffset) / O2_FILETABLE_ENTRY_SIZE), + (int) ceil(((double) (dbH->timesTableOffset - dbH->dataOffset)) / ((double) (dbH->dbSize - dbH->l2normTableOffset)))); if(dbH->flags & O2_FLAG_L2NORM) { fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); } diff -r fe922b9d87f8 -r 4dcb09f5fe85 gengetopt.in --- a/gengetopt.in Sat Apr 12 13:28:30 2008 +0000 +++ b/gengetopt.in Wed Apr 16 09:59:43 2008 +0000 @@ -5,11 +5,20 @@ option "verbosity" v "level of detail of operational information." int typestr="detail" default="1" optional text "\nDatabase commands are UPPER CASE. Command options are lower case.\n" text "" -section "Database Setup" sectiondesc="All database operations require a database argument." +section "Database Operations" sectiondesc="All database operations require a database argument." option "database" d "database file required by Database commands." string typestr="filename" optional + +section "Database Creation" sectiondesc="Creating a new database file." + option "NEW" N "make a new (initially empty) database." dependon="database" optional -option "size" - "size of database file (in MB)" int dependon="NEW" default="2000" optional +option "size" - "size of database file (in MB)" int dependon="NEW" optional hidden +option "datasize" - "size of data table requested (in MB)" int dependon="NEW" default="1355" optional +option "ntracks" - "capacity of database for tracks" int dependon="NEW" default="20000" optional +option "datadim" - "dimensionality of stored data" int dependon="NEW" default="9" optional + +section "Database Maintenance" sectiondesc="Querying, tweaking and dumping databases." + option "STATUS" S "output database information to stdout." dependon="database" optional option "DUMP" D "output all entries: index key size." dependon="database" optional option "output" - "output directory" string dependon="DUMP" default="audioDB.dump" optional diff -r fe922b9d87f8 -r 4dcb09f5fe85 insert.cpp --- a/insert.cpp Sat Apr 12 13:28:30 2008 +0000 +++ b/insert.cpp Wed Apr 16 09:59:43 2008 +0000 @@ -4,8 +4,8 @@ unsigned int fmaxfiles, tmaxfiles; unsigned int maxfiles; - fmaxfiles = fileTableLength / O2_FILETABLESIZE; - tmaxfiles = trackTableLength / O2_TRACKTABLESIZE; + fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE; + tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE; maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles; return(dbH->numFiles < maxfiles); } @@ -42,7 +42,7 @@ // Linear scan of filenames check for pre-existing feature unsigned alreadyInserted=0; for(unsigned k=0; knumFiles; k++) - if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){ + if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){ alreadyInserted=1; break; } @@ -64,7 +64,7 @@ return; } - strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); + strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)); off_t insertoffset = dbH->length;// Store current state @@ -236,7 +236,7 @@ unsigned alreadyInserted=0; for(unsigned k=0; knumFiles; k++) - if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){ + if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey)+1)==0){ alreadyInserted=1; break; } @@ -285,7 +285,7 @@ close(thispowerfd); } } - strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); + strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey)); off_t insertoffset = dbH->length;// Store current state diff -r fe922b9d87f8 -r 4dcb09f5fe85 query.cpp --- a/query.cpp Sat Apr 12 13:28:30 2008 +0000 +++ b/query.cpp Wed Apr 16 09:59:43 2008 +0000 @@ -55,7 +55,7 @@ // return ordinal position of key in keyTable unsigned audioDB::getKeyPos(char* key){ for(unsigned k=0; knumFiles; k++) - if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) + if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0) return k; error("Key not found",key); return O2_ERR_KEYNOTFOUND; diff -r fe922b9d87f8 -r 4dcb09f5fe85 reporter.h --- a/reporter.h Sat Apr 12 13:28:30 2008 +0000 +++ b/reporter.h Wed Apr 16 09:59:43 2008 +0000 @@ -90,7 +90,7 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " "; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; } } else { @@ -109,7 +109,7 @@ adbQueryResponse->result.Dist[k] = r.dist; adbQueryResponse->result.Qpos[k] = r.qpos; adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE); + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); } } } @@ -191,7 +191,7 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " "; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; } } else { @@ -210,7 +210,7 @@ adbQueryResponse->result.Dist[k] = r.dist; adbQueryResponse->result.Qpos[k] = r.qpos; adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE); + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); } } } @@ -283,7 +283,7 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " " << r.count << std::endl; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl; } } else { // FIXME @@ -349,7 +349,7 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " " << r.dist << std::endl; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.dist << std::endl; for(int k=0; k < (int)pointNN; k++){ NNresult rk = point_queues[r.trackID].top(); std::cout << rk.dist << " " << rk.qpos << " " << rk.spos << std::endl; @@ -372,7 +372,7 @@ adbQueryResponse->result.Dist[k] = r.dist; adbQueryResponse->result.Qpos[k] = r.qpos; adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE); + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); } } // clean up @@ -469,7 +469,7 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " " << r.count << std::endl; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl; int qsize=point_queues[r.trackID].size(); for(int k=0; k < qsize; k++){ NNresult rk = point_queues[r.trackID].top();