annotate audioDB.h @ 62:9379253c3f78

Fix tests/0004 by initializing the distance vector for point queries with -DBL_MAX rather than 0.0.
author mas01cr
date Thu, 20 Sep 2007 14:29:30 +0000
parents cd851d1fbc62
children 128f065fd250
rev   line source
mas01cr@0 1 /* audioDB.h
mas01cr@0 2
mas01cr@0 3 audioDB version 1.0
mas01cr@0 4
mas01cr@0 5 An efficient feature-vector database management system (FVDBMS) for
mas01cr@0 6 content-based multimedia search and retrieval.
mas01cr@0 7
mas01cr@0 8 Usage: audioDB [OPTIONS]...
mas01cr@0 9
mas01cr@0 10 --full-help Print help, including hidden options, and exit
mas01cr@0 11 -V, --version Print version and exit
mas01cr@0 12 -H, --help print help on audioDB usage and exit.
mas01cr@0 13
mas01cr@0 14 Database Setup:
mas01cr@0 15 These commands require a database argument.
mas01cr@0 16 -d, --database=filename database name to be used with database commands
mas01cr@0 17 -N, --new make a new database
mas01cr@0 18 -S, --status database information
mas01mc@18 19 -D, --dump list all tracks: index key size
mas01cr@0 20
mas01cr@0 21 Database Insertion:
mas01cr@0 22 The following commands process a binary input feature file and optional
mas01cr@0 23 associated key.
mas01cr@0 24 -I, --insert add feature vectors to an existing database
mas01cr@0 25 -f, --features=filename binary series of vectors file
mas01cr@0 26 -t, --times=filename list of time points (ascii) for feature vectors
mas01cr@0 27 -k, --key=identifier unique identifier associated with features
mas01cr@0 28
mas01cr@0 29 Batch Commands:
mas01cr@0 30 These batch commands require a list of feature vector filenames in a text
mas01cr@0 31 file and optional list of keys in a text file.
mas01cr@0 32 -B, --batchinsert add feature vectors named in a featureList file
mas01cr@0 33 (with optional keys in a keyList file) to the
mas01cr@0 34 named database
mas01cr@0 35 -F, --featureList=filename text file containing list of binary feature
mas01cr@0 36 vector files to process
mas01cr@0 37 -T, --timesList=filename text file containing list of ascii time-point
mas01cr@0 38 files for each feature vector file named in
mas01cr@0 39 featureList
mas01cr@0 40 -K, --keyList=filename text file containing list of unique identifiers
mas01cr@0 41 to associate with list of feature files
mas01cr@0 42
mas01cr@0 43 Database Search:
mas01cr@0 44 Thse commands control the behaviour of retrieval from a named database.
mas01cr@0 45 -Q, --query perform a content-based search on the named
mas01cr@0 46 database using the named feature vector file
mas01cr@0 47 as a query
mas01cr@0 48 -q, --qtype=type the type of search (possible values="point",
mas01mc@18 49 "track", "sequence" default=`sequence')
mas01cr@0 50 -p, --qpoint=position ordinal position of query vector (or start of
mas01cr@0 51 sequence) in feature vector input file
mas01cr@0 52 (default=`0')
mas01cr@0 53 -n, --pointnn=numpoints number of point nearest neighbours to use [per
mas01mc@18 54 track in track and sequence mode]
mas01cr@0 55 (default=`10')
mas01cr@0 56 -r, --resultlength=length maximum length of the result list
mas01cr@0 57 (default=`10')
mas01cr@0 58 -l, --sequencelength=length length of sequences for sequence search
mas01cr@0 59 (default=`16')
mas01cr@0 60 -h, --sequencehop=hop hop size of sequence window for sequence search
mas01cr@0 61 (default=`1')
mas01cr@0 62
mas01cr@0 63 Web Services:
mas01cr@0 64 These commands enable the database process to establish a connection via the
mas01cr@0 65 internet and operate as separate client and server processes.
mas01cr@0 66 -s, --server=port run as standalone web service on named port
mas01cr@0 67 (default=`80011')
mas01cr@0 68 -c, --client=hostname:port run as a client using named host service
mas01cr@0 69
mas01cr@0 70 */
mas01cr@0 71
mas01cr@0 72
mas01cr@0 73 #include <stdio.h>
mas01cr@0 74 #include <stdlib.h>
mas01cr@0 75 #include <sys/types.h>
mas01cr@0 76 #include <sys/stat.h>
mas01cr@0 77 #include <sys/mman.h>
mas01cr@0 78 #include <fcntl.h>
mas01cr@0 79 #include <string.h>
mas01cr@0 80 #include <iostream>
mas01cr@0 81 #include <fstream>
mas01cr@0 82 #include <math.h>
mas01cr@0 83 #include <sys/time.h>
mas01cr@0 84 #include <assert.h>
mas01cr@62 85 #include <float.h>
mas01cr@0 86
mas01cr@0 87 // includes for web services
mas01cr@0 88 #include "soapH.h"
mas01cr@0 89 #include "adb.nsmap"
mas01cr@0 90 #include "cmdline.h"
mas01cr@0 91
mas01cr@0 92 #define MAXSTR 512
mas01cr@0 93
mas01cr@0 94 // Databse PRIMARY commands
mas01cr@0 95 #define COM_CREATE "--NEW"
mas01cr@0 96 #define COM_INSERT "--INSERT"
mas01cr@0 97 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 98 #define COM_QUERY "--QUERY"
mas01cr@0 99 #define COM_STATUS "--STATUS"
mas01cr@0 100 #define COM_L2NORM "--L2NORM"
mas01cr@0 101 #define COM_DUMP "--DUMP"
mas01cr@0 102 #define COM_SERVER "--SERVER"
mas01cr@0 103
mas01cr@0 104 // parameters
mas01cr@0 105 #define COM_CLIENT "--client"
mas01cr@0 106 #define COM_DATABASE "--database"
mas01cr@0 107 #define COM_QTYPE "--qtype"
mas01cr@0 108 #define COM_SEQLEN "--sequencelength"
mas01cr@0 109 #define COM_SEQHOP "--sequencehop"
mas01cr@0 110 #define COM_POINTNN "--pointnn"
mas01mc@18 111 #define COM_TRACKNN "--resultlength"
mas01cr@0 112 #define COM_QPOINT "--qpoint"
mas01cr@0 113 #define COM_FEATURES "--features"
mas01cr@0 114 #define COM_QUERYKEY "--key"
mas01cr@0 115 #define COM_KEYLIST "--keyList"
mas01cr@0 116 #define COM_TIMES "--times"
mas01cr@0 117
mas01cr@0 118 #define O2_MAGIC 1111765583 // 'B'<<24|'D'<<16|'2'<<8|'O' reads O2DB in little endian order
mas01cr@0 119
mas01cr@0 120 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 121 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 122
mas01mc@7 123 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01mc@7 124 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 125
mas01cr@0 126 //#define O2_MAXFILES (1000000)
mas01cr@0 127 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 128 #define O2_MAXFILESTR (256U)
mas01cr@0 129 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01mc@18 130 #define O2_TRACKTABLESIZE (sizeof(unsigned))
mas01cr@0 131 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 132 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 133 #define O2_MAXDIM (1000U)
mas01mc@17 134 #define O2_MAXNN (10000U)
mas01cr@0 135
mas01cr@0 136 // Flags
mas01cr@0 137 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 138 #define O2_FLAG_MINMAX (0x2U)
mas01cr@0 139 #define O2_FLAG_POINT_QUERY (0x4U)
mas01cr@0 140 #define O2_FLAG_SEQUENCE_QUERY (0x8U)
mas01mc@18 141 #define O2_FLAG_TRACK_QUERY (0x10U)
mas01cr@0 142 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 143
mas01cr@0 144 // Error Codes
mas01cr@0 145 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 146
mas01cr@0 147 // Macros
mas01cr@0 148 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 149
mas01cr@0 150 using namespace std;
mas01cr@0 151
mas01cr@0 152 // 64 byte header
mas01cr@0 153 typedef struct dbTableHeader{
mas01cr@0 154 unsigned magic;
mas01cr@0 155 unsigned numFiles;
mas01cr@0 156 unsigned dim;
mas01cr@0 157 unsigned length;
mas01cr@0 158 unsigned flags;
mas01cr@0 159 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 160
mas01cr@0 161
mas01cr@0 162 class audioDB{
mas01cr@0 163
mas01cr@0 164 private:
mas01cr@0 165 gengetopt_args_info args_info;
mas01cr@0 166 unsigned dim;
mas01cr@0 167 const char *dbName;
mas01cr@0 168 const char *inFile;
mas01cr@0 169 const char *hostport;
mas01cr@0 170 const char *key;
mas01mc@18 171 const char* trackFileName;
mas01mc@18 172 ifstream *trackFile;
mas01cr@0 173 const char *command;
mas01cr@0 174 const char *timesFileName;
mas01cr@0 175 ifstream *timesFile;
mas01cr@0 176
mas01cr@0 177 int dbfid;
mas01cr@0 178 int infid;
mas01cr@0 179 char* db;
mas01cr@0 180 char* indata;
mas01cr@0 181 struct stat statbuf;
mas01cr@0 182 dbTableHeaderPtr dbH;
mas01cr@0 183 size_t fileTableOffset;
mas01mc@18 184 size_t trackTableOffset;
mas01cr@0 185 size_t dataoffset;
mas01cr@0 186 size_t l2normTableOffset;
mas01cr@0 187 size_t timesTableOffset;
mas01cr@0 188
mas01cr@0 189 char *fileTable;
mas01mc@18 190 unsigned* trackTable;
mas01cr@0 191 double* dataBuf;
mas01cr@0 192 double* inBuf;
mas01cr@0 193 double* l2normTable;
mas01cr@0 194 double* qNorm;
mas01cr@0 195 double* sNorm;
mas01cr@0 196 double* timesTable;
mas01cr@0 197
mas01cr@0 198 // Flags and parameters
mas01cr@0 199 unsigned verbosity; // how much do we want to know?
mas01cr@0 200 unsigned queryType; // point queries default
mas01cr@0 201 unsigned pointNN; // how many point NNs ?
mas01mc@18 202 unsigned trackNN; // how many track NNs ?
mas01cr@0 203 unsigned sequenceLength;
mas01cr@0 204 unsigned sequenceHop;
mas01cr@0 205 unsigned queryPoint;
mas01cr@0 206 unsigned usingQueryPoint;
mas01cr@0 207 unsigned usingTimes;
mas01cr@0 208 unsigned isClient;
mas01cr@0 209 unsigned isServer;
mas01cr@0 210 unsigned port;
mas01cr@0 211 double timesTol;
mas01mc@17 212 double radius;
mas01mc@17 213
mas01cr@0 214 // Timers
mas01cr@0 215 struct timeval tv1;
mas01cr@0 216 struct timeval tv2;
mas01cr@0 217
mas01cr@0 218 // private methods
mas01cr@32 219 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@0 220 void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01mc@18 221 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01mc@20 222 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01mc@20 223 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 224
mas01cr@27 225 void initTables(const char* dbName, bool forWrite, const char* inFile);
mas01cr@0 226 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 227 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 228 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@0 229 unsigned getKeyPos(char* key);
mas01cr@0 230 public:
mas01cr@0 231
mas01cr@0 232 audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult=0);
mas01cr@0 233 ~audioDB();
mas01cr@0 234 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 235 void get_lock(int fd, bool exclusive);
mas01cr@30 236 void release_lock(int fd);
mas01cr@0 237 void create(const char* dbName);
mas01cr@0 238 void drop();
mas01cr@0 239 void insert(const char* dbName, const char* inFile);
mas01cr@0 240 void batchinsert(const char* dbName, const char* inFile);
mas01cr@0 241 void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 242 void status(const char* dbName);
mas01cr@0 243 void ws_status(const char*dbName, char* hostport);
mas01mc@18 244 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 245 void l2norm(const char* dbName);
mas01cr@0 246 void dump(const char* dbName);
mas01cr@0 247
mas01cr@0 248 // web services
mas01cr@0 249 void startServer();
mas01cr@0 250
mas01cr@0 251 };
mas01mc@17 252