annotate audioDB.h @ 0:9eab3e0f3068

Initial import
author mas01cr
date Fri, 20 Jul 2007 15:51:39 +0000
parents
children bd6bb994081b 69eb22e09772
rev   line source
mas01cr@0 1 /* audioDB.h
mas01cr@0 2
mas01cr@0 3 audioDB version 1.0
mas01cr@0 4
mas01cr@0 5 An efficient feature-vector database management system (FVDBMS) for
mas01cr@0 6 content-based multimedia search and retrieval.
mas01cr@0 7
mas01cr@0 8 Usage: audioDB [OPTIONS]...
mas01cr@0 9
mas01cr@0 10 --full-help Print help, including hidden options, and exit
mas01cr@0 11 -V, --version Print version and exit
mas01cr@0 12 -H, --help print help on audioDB usage and exit.
mas01cr@0 13
mas01cr@0 14 Database Setup:
mas01cr@0 15 These commands require a database argument.
mas01cr@0 16 -d, --database=filename database name to be used with database commands
mas01cr@0 17 -N, --new make a new database
mas01cr@0 18 -S, --status database information
mas01cr@0 19 -D, --dump list all segments: index key size
mas01cr@0 20
mas01cr@0 21 Database Insertion:
mas01cr@0 22 The following commands process a binary input feature file and optional
mas01cr@0 23 associated key.
mas01cr@0 24 -I, --insert add feature vectors to an existing database
mas01cr@0 25 -f, --features=filename binary series of vectors file
mas01cr@0 26 -t, --times=filename list of time points (ascii) for feature vectors
mas01cr@0 27 -k, --key=identifier unique identifier associated with features
mas01cr@0 28
mas01cr@0 29 Batch Commands:
mas01cr@0 30 These batch commands require a list of feature vector filenames in a text
mas01cr@0 31 file and optional list of keys in a text file.
mas01cr@0 32 -B, --batchinsert add feature vectors named in a featureList file
mas01cr@0 33 (with optional keys in a keyList file) to the
mas01cr@0 34 named database
mas01cr@0 35 -F, --featureList=filename text file containing list of binary feature
mas01cr@0 36 vector files to process
mas01cr@0 37 -T, --timesList=filename text file containing list of ascii time-point
mas01cr@0 38 files for each feature vector file named in
mas01cr@0 39 featureList
mas01cr@0 40 -K, --keyList=filename text file containing list of unique identifiers
mas01cr@0 41 to associate with list of feature files
mas01cr@0 42
mas01cr@0 43 Database Search:
mas01cr@0 44 Thse commands control the behaviour of retrieval from a named database.
mas01cr@0 45 -Q, --query perform a content-based search on the named
mas01cr@0 46 database using the named feature vector file
mas01cr@0 47 as a query
mas01cr@0 48 -q, --qtype=type the type of search (possible values="point",
mas01cr@0 49 "segment", "sequence" default=`sequence')
mas01cr@0 50 -p, --qpoint=position ordinal position of query vector (or start of
mas01cr@0 51 sequence) in feature vector input file
mas01cr@0 52 (default=`0')
mas01cr@0 53 -n, --pointnn=numpoints number of point nearest neighbours to use [per
mas01cr@0 54 segment in segment and sequence mode]
mas01cr@0 55 (default=`10')
mas01cr@0 56 -r, --resultlength=length maximum length of the result list
mas01cr@0 57 (default=`10')
mas01cr@0 58 -l, --sequencelength=length length of sequences for sequence search
mas01cr@0 59 (default=`16')
mas01cr@0 60 -h, --sequencehop=hop hop size of sequence window for sequence search
mas01cr@0 61 (default=`1')
mas01cr@0 62
mas01cr@0 63 Web Services:
mas01cr@0 64 These commands enable the database process to establish a connection via the
mas01cr@0 65 internet and operate as separate client and server processes.
mas01cr@0 66 -s, --server=port run as standalone web service on named port
mas01cr@0 67 (default=`80011')
mas01cr@0 68 -c, --client=hostname:port run as a client using named host service
mas01cr@0 69
mas01cr@0 70 */
mas01cr@0 71
mas01cr@0 72
mas01cr@0 73 #include <stdio.h>
mas01cr@0 74 #include <stdlib.h>
mas01cr@0 75 #include <sys/types.h>
mas01cr@0 76 #include <sys/stat.h>
mas01cr@0 77 #include <sys/mman.h>
mas01cr@0 78 #include <fcntl.h>
mas01cr@0 79 #include <string.h>
mas01cr@0 80 #include <iostream>
mas01cr@0 81 #include <fstream>
mas01cr@0 82 #include <math.h>
mas01cr@0 83 #include <sys/time.h>
mas01cr@0 84 #include <assert.h>
mas01cr@0 85
mas01cr@0 86 // includes for web services
mas01cr@0 87 #include "soapH.h"
mas01cr@0 88 #include "adb.nsmap"
mas01cr@0 89 #include "cmdline.h"
mas01cr@0 90
mas01cr@0 91 #define MAXSTR 512
mas01cr@0 92
mas01cr@0 93 // Databse PRIMARY commands
mas01cr@0 94 #define COM_CREATE "--NEW"
mas01cr@0 95 #define COM_INSERT "--INSERT"
mas01cr@0 96 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 97 #define COM_QUERY "--QUERY"
mas01cr@0 98 #define COM_STATUS "--STATUS"
mas01cr@0 99 #define COM_L2NORM "--L2NORM"
mas01cr@0 100 #define COM_DUMP "--DUMP"
mas01cr@0 101 #define COM_SERVER "--SERVER"
mas01cr@0 102
mas01cr@0 103 // parameters
mas01cr@0 104 #define COM_CLIENT "--client"
mas01cr@0 105 #define COM_DATABASE "--database"
mas01cr@0 106 #define COM_QTYPE "--qtype"
mas01cr@0 107 #define COM_SEQLEN "--sequencelength"
mas01cr@0 108 #define COM_SEQHOP "--sequencehop"
mas01cr@0 109 #define COM_POINTNN "--pointnn"
mas01cr@0 110 #define COM_SEGNN "--resultlength"
mas01cr@0 111 #define COM_QPOINT "--qpoint"
mas01cr@0 112 #define COM_FEATURES "--features"
mas01cr@0 113 #define COM_QUERYKEY "--key"
mas01cr@0 114 #define COM_KEYLIST "--keyList"
mas01cr@0 115 #define COM_TIMES "--times"
mas01cr@0 116
mas01cr@0 117 #define O2_MAGIC 1111765583 // 'B'<<24|'D'<<16|'2'<<8|'O' reads O2DB in little endian order
mas01cr@0 118
mas01cr@0 119 #define O2_DEFAULT_POINTNN (10U)
mas01cr@0 120 #define O2_DEFAULT_SEGNN (10U)
mas01cr@0 121
mas01cr@0 122 //#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01cr@0 123 #define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 124
mas01cr@0 125 //#define O2_MAXFILES (1000000)
mas01cr@0 126 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 127 #define O2_MAXFILESTR (256U)
mas01cr@0 128 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01cr@0 129 #define O2_SEGTABLESIZE (sizeof(unsigned))
mas01cr@0 130 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 131 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 132 #define O2_MAXDIM (1000U)
mas01cr@0 133 #define O2_MAXNN (1000U)
mas01cr@0 134
mas01cr@0 135 // Flags
mas01cr@0 136 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 137 #define O2_FLAG_MINMAX (0x2U)
mas01cr@0 138 #define O2_FLAG_POINT_QUERY (0x4U)
mas01cr@0 139 #define O2_FLAG_SEQUENCE_QUERY (0x8U)
mas01cr@0 140 #define O2_FLAG_SEG_QUERY (0x10U)
mas01cr@0 141 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 142
mas01cr@0 143 // Error Codes
mas01cr@0 144 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 145
mas01cr@0 146 // Macros
mas01cr@0 147 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 148
mas01cr@0 149 using namespace std;
mas01cr@0 150
mas01cr@0 151 // 64 byte header
mas01cr@0 152 typedef struct dbTableHeader{
mas01cr@0 153 unsigned magic;
mas01cr@0 154 unsigned numFiles;
mas01cr@0 155 unsigned dim;
mas01cr@0 156 unsigned length;
mas01cr@0 157 unsigned flags;
mas01cr@0 158 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 159
mas01cr@0 160
mas01cr@0 161 class audioDB{
mas01cr@0 162
mas01cr@0 163 private:
mas01cr@0 164 gengetopt_args_info args_info;
mas01cr@0 165 unsigned dim;
mas01cr@0 166 const char *dbName;
mas01cr@0 167 const char *inFile;
mas01cr@0 168 const char *hostport;
mas01cr@0 169 const char *key;
mas01cr@0 170 const char* segFileName;
mas01cr@0 171 ifstream *segFile;
mas01cr@0 172 const char *command;
mas01cr@0 173 const char *timesFileName;
mas01cr@0 174 ifstream *timesFile;
mas01cr@0 175
mas01cr@0 176 int dbfid;
mas01cr@0 177 int infid;
mas01cr@0 178 char* db;
mas01cr@0 179 char* indata;
mas01cr@0 180 struct stat statbuf;
mas01cr@0 181 dbTableHeaderPtr dbH;
mas01cr@0 182 size_t fileTableOffset;
mas01cr@0 183 size_t segTableOffset;
mas01cr@0 184 size_t dataoffset;
mas01cr@0 185 size_t l2normTableOffset;
mas01cr@0 186 size_t timesTableOffset;
mas01cr@0 187
mas01cr@0 188 char *fileTable;
mas01cr@0 189 unsigned* segTable;
mas01cr@0 190 double* dataBuf;
mas01cr@0 191 double* inBuf;
mas01cr@0 192 double* l2normTable;
mas01cr@0 193 double* qNorm;
mas01cr@0 194 double* sNorm;
mas01cr@0 195 double* timesTable;
mas01cr@0 196
mas01cr@0 197 // Flags and parameters
mas01cr@0 198 unsigned verbosity; // how much do we want to know?
mas01cr@0 199 unsigned queryType; // point queries default
mas01cr@0 200 unsigned pointNN; // how many point NNs ?
mas01cr@0 201 unsigned segNN; // how many seg NNs ?
mas01cr@0 202 unsigned sequenceLength;
mas01cr@0 203 unsigned sequenceHop;
mas01cr@0 204 unsigned queryPoint;
mas01cr@0 205 unsigned usingQueryPoint;
mas01cr@0 206 unsigned usingTimes;
mas01cr@0 207 unsigned isClient;
mas01cr@0 208 unsigned isServer;
mas01cr@0 209 unsigned port;
mas01cr@0 210 double timesTol;
mas01cr@0 211
mas01cr@0 212 // Timers
mas01cr@0 213 struct timeval tv1;
mas01cr@0 214 struct timeval tv2;
mas01cr@0 215
mas01cr@0 216
mas01cr@0 217
mas01cr@0 218
mas01cr@0 219 // private methods
mas01cr@0 220 void error(const char* a, const char* b = "");
mas01cr@0 221 void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 222 void sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 223 void segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 224 void segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 225
mas01cr@0 226 void initTables(const char* dbName, const char* inFile);
mas01cr@0 227 void NBestMatchedFilter();
mas01cr@0 228 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 229 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 230 void normalize(double* X, int dim, int n);
mas01cr@0 231 void normalize(double* X, int dim, int n, double minval, double maxval);
mas01cr@0 232 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@0 233 unsigned getKeyPos(char* key);
mas01cr@0 234 public:
mas01cr@0 235
mas01cr@0 236 audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult=0);
mas01cr@0 237 ~audioDB();
mas01cr@0 238 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@0 239 void create(const char* dbName);
mas01cr@0 240 void drop();
mas01cr@0 241 void insert(const char* dbName, const char* inFile);
mas01cr@0 242 void batchinsert(const char* dbName, const char* inFile);
mas01cr@0 243 void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 244 void status(const char* dbName);
mas01cr@0 245 void ws_status(const char*dbName, char* hostport);
mas01cr@0 246 void ws_query(const char*dbName, const char *segKey, const char* hostport);
mas01cr@0 247 void l2norm(const char* dbName);
mas01cr@0 248 void dump(const char* dbName);
mas01cr@0 249 void deleteDB(const char* dbName, const char* inFile);
mas01cr@0 250
mas01cr@0 251 // web services
mas01cr@0 252 void startServer();
mas01cr@0 253
mas01cr@0 254 };