changeset 129:f7eba8eb272c

Variable size databases, part 2: new --size argument on database creation; use it for the dbSize field. (Note that maximum use of this can be obtained on 32-bit platforms only by compiling with -D_FILE_OFFSET_BITS=64 or similar, otherwise 2^31 is an upper exclusive limit for off_t and hence for the lseek() call)
author mas01cr
date Fri, 19 Oct 2007 14:41:54 +0000
parents f789aa32382f
children 63ca70f2bf37
files audioDB.cpp audioDB.h gengetopt.in
diffstat 3 files changed, 15 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Fri Oct 19 13:51:53 2007 +0000
+++ b/audioDB.cpp	Fri Oct 19 14:41:54 2007 +0000
@@ -152,6 +152,13 @@
     }
   }
 
+  if(args_info.size_given) {
+    if (args_info.size_arg < 250 || args_info.size_arg > 4000) {
+      error("Size out of range", "");
+    }
+    size = args_info.size_arg * 1000000;
+  }
+
   if(args_info.radius_given){
     radius=args_info.radius_arg;
     if(radius<=0 || radius>1000000000){
@@ -372,7 +379,7 @@
   get_lock(dbfid, 1);
 
   // go to the location corresponding to the last byte
-  if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1)
+  if (lseek (dbfid, size - 1, SEEK_SET) == -1)
     error("lseek error in db file", "", "lseek");
 
   // write a dummy byte at the last location
@@ -383,7 +390,7 @@
   if(verbosity) {
     cerr << "header size:" << O2_HEADERSIZE << endl;
   }
-  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
+  if ((db = (char*) mmap(0, size, PROT_READ | PROT_WRITE,
 			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
     error("mmap error for creating database", "", "mmap");
   
@@ -400,9 +407,9 @@
   dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8);
   dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES, 8);
   dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES, 8);
-  dbH->l2normTableOffset = ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8);
+  dbH->l2normTableOffset = ALIGN_DOWN(size - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8);
   dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8);
-  dbH->dbSize = O2_DEFAULTDBSIZE;
+  dbH->dbSize = size;
 
   memcpy (db, dbH, O2_HEADERSIZE);
   if(verbosity) {
--- a/audioDB.h	Fri Oct 19 13:51:53 2007 +0000
+++ b/audioDB.h	Fri Oct 19 14:41:54 2007 +0000
@@ -141,6 +141,7 @@
 
   // Flags and parameters
   unsigned verbosity;   // how much do we want to know?
+  unsigned size; // given size (for creation)
   unsigned queryType; // point queries default
   unsigned pointNN;   // how many point NNs ?
   unsigned trackNN;   // how many track NNs ?
@@ -220,6 +221,7 @@
   qNorm(0), \
   timesTable(0), \
   verbosity(1), \
+  size(O2_DEFAULTDBSIZE), \
   queryType(O2_POINT_QUERY), \
   pointNN(O2_DEFAULT_POINTNN), \
   trackNN(O2_DEFAULT_TRACKNN), \
--- a/gengetopt.in	Fri Oct 19 13:51:53 2007 +0000
+++ b/gengetopt.in	Fri Oct 19 14:41:54 2007 +0000
@@ -1,37 +1,3 @@
-# POSIX-COMPLIANT COMMAND LINE ARGUMENTS:
-#
-#     -v --verbosity
-#
-#     -d --database dbName
-#     -N --NEW
-#     -S --STATUS
-#     -D --DUMP
-#     -L --L2NORM
-#
-#     -f --features features.bin 
-#     -k --key key
-#     -t --timePoints.txt
-#     -I --INSERT 
-#     -U --UPDATE
-#
-#     -F --featureList featureList.txt 
-#     -K --keyList keyList.txt
-#     -T --timePointsList.txt
-#     -B --BATCHINSERT dbName
-#
-#     -Q --QUERY {point|track|sequence} 
-#     -p --qpoint - ordinal position of query vector in feature input file
-#     -n --pointnn n  - number of nearest neigbour points per track
-#     -r --resultlength n - number of tracks (nearest neighbours) to return
-#     -l --seqlen len - length of sequence
-#     -h --seqhop hop - hop size of sequence
-#     -R --radius - radius-based search, return all points/tracks/sequences < radius (0...Inf)
-#     -x --x - time expand (compress) factor (ratio of result length to query length (and visa-versa)) [1..Inf]
-#     -o --rotate - rotate the query feature vectors on search
-#
-#     -s --SERVER port
-#     -c --client host:port
-
 package "audioDB"
 version "version 1.0"
 purpose "A feature vector database management system for content-based retrieval."
@@ -43,9 +9,11 @@
 
 option "database" d "database file required by Database commands." string typestr="filename" optional
 option "NEW"    N "make a new (initially empty) database." dependon="database" optional
+option "size"   - "size of database file (in MB)" int dependon="NEW" default="2000" optional
 option "STATUS" S "output database information to stdout." dependon="database" optional
 option "DUMP"   D "output all entries: index key size." dependon="database" optional
 option "L2NORM" L "unit norm vectors and norm all future inserts." dependon="database" optional
+
 section "Database Insertion" sectiondesc="The following commands insert feature files, with optional keys and timestamps.\n"
 option "INSERT"      I "add feature vectors to an existing database." dependon="features" optional
 option "UPDATE"      U "replace inserted vectors associated with key with new input vectors." dependon="features" dependon="key" dependon="database" optional hidden