annotate audioDB.h @ 108:bc141fd1dc41

New on-disk format! * new magic number: "o2db" rather than "O2DB". Check for the old one and give a helpful error message. (We could in principle handle the old databases transparently, but since only MC has actually used them and he has indicated that this is not desperately necessary...) * add fields to header: a file format version (currently 0; we have 2^32-1 revisions to make before we have to worry...) and fields for the offsets of the various tables. This is hopefully a little bit more futureproof: we can move the default locations of the tables around, and even adding new optional tables can be done easily in a fairly transparent manner (if the relevant header field is 0, don't use that feature). * align regions to the appropriate 32-word boundary. This gives us some space to breathe for in the header (admittedly only 15 words on 64-bit architectures...)
author mas01cr
date Fri, 05 Oct 2007 14:21:43 +0000
parents 10feb98abebf
children 43722a0ba717
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@0 15
mas01cr@0 16 // includes for web services
mas01cr@0 17 #include "soapH.h"
mas01cr@0 18 #include "adb.nsmap"
mas01cr@0 19 #include "cmdline.h"
mas01cr@0 20
mas01cr@0 21 #define MAXSTR 512
mas01cr@0 22
mas01cr@0 23 // Databse PRIMARY commands
mas01cr@0 24 #define COM_CREATE "--NEW"
mas01cr@0 25 #define COM_INSERT "--INSERT"
mas01cr@0 26 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 27 #define COM_QUERY "--QUERY"
mas01cr@0 28 #define COM_STATUS "--STATUS"
mas01cr@0 29 #define COM_L2NORM "--L2NORM"
mas01cr@0 30 #define COM_DUMP "--DUMP"
mas01cr@0 31 #define COM_SERVER "--SERVER"
mas01cr@0 32
mas01cr@0 33 // parameters
mas01cr@0 34 #define COM_CLIENT "--client"
mas01cr@0 35 #define COM_DATABASE "--database"
mas01cr@0 36 #define COM_QTYPE "--qtype"
mas01cr@0 37 #define COM_SEQLEN "--sequencelength"
mas01cr@0 38 #define COM_SEQHOP "--sequencehop"
mas01cr@0 39 #define COM_POINTNN "--pointnn"
mas01mc@18 40 #define COM_TRACKNN "--resultlength"
mas01cr@0 41 #define COM_QPOINT "--qpoint"
mas01cr@0 42 #define COM_FEATURES "--features"
mas01cr@0 43 #define COM_QUERYKEY "--key"
mas01cr@0 44 #define COM_KEYLIST "--keyList"
mas01cr@0 45 #define COM_TIMES "--times"
mas01cr@0 46
mas01cr@108 47 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 48 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@108 49 #define O2_FORMAT_VERSION (0U)
mas01cr@0 50
mas01cr@0 51 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 52 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 53
mas01mc@7 54 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01mc@7 55 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 56
mas01cr@0 57 //#define O2_MAXFILES (1000000)
mas01cr@0 58 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 59 #define O2_MAXFILESTR (256U)
mas01cr@0 60 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01mc@18 61 #define O2_TRACKTABLESIZE (sizeof(unsigned))
mas01cr@0 62 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 63 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 64 #define O2_MAXDIM (1000U)
mas01mc@17 65 #define O2_MAXNN (10000U)
mas01cr@0 66
mas01cr@0 67 // Flags
mas01cr@0 68 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 69 #define O2_FLAG_MINMAX (0x2U)
mas01cr@0 70 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 71
mas01cr@105 72 // Query types
mas01cr@105 73 #define O2_POINT_QUERY (0x4U)
mas01cr@105 74 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 75 #define O2_TRACK_QUERY (0x10U)
mas01cr@105 76
mas01cr@0 77 // Error Codes
mas01cr@0 78 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 79
mas01cr@0 80 // Macros
mas01cr@0 81 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 82
mas01cr@108 83 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 84 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 85
mas01cr@0 86 using namespace std;
mas01cr@0 87
mas01cr@0 88 // 64 byte header
mas01cr@0 89 typedef struct dbTableHeader{
mas01cr@0 90 unsigned magic;
mas01cr@108 91 unsigned version;
mas01cr@0 92 unsigned numFiles;
mas01cr@0 93 unsigned dim;
mas01cr@0 94 unsigned flags;
mas01cr@108 95 size_t length;
mas01cr@108 96 size_t fileTableOffset;
mas01cr@108 97 size_t trackTableOffset;
mas01cr@108 98 size_t dataOffset;
mas01cr@108 99 size_t l2normTableOffset;
mas01cr@108 100 size_t timesTableOffset;
mas01cr@0 101 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 102
mas01cr@0 103
mas01cr@0 104 class audioDB{
mas01cr@0 105
mas01cr@0 106 private:
mas01cr@0 107 gengetopt_args_info args_info;
mas01cr@0 108 unsigned dim;
mas01cr@0 109 const char *dbName;
mas01cr@0 110 const char *inFile;
mas01cr@0 111 const char *hostport;
mas01cr@0 112 const char *key;
mas01mc@18 113 const char* trackFileName;
mas01mc@18 114 ifstream *trackFile;
mas01cr@0 115 const char *command;
mas01cr@0 116 const char *timesFileName;
mas01cr@0 117 ifstream *timesFile;
mas01cr@0 118
mas01cr@0 119 int dbfid;
mas01cr@0 120 int infid;
mas01cr@0 121 char* db;
mas01cr@0 122 char* indata;
mas01cr@0 123 struct stat statbuf;
mas01cr@0 124 dbTableHeaderPtr dbH;
mas01cr@0 125
mas01cr@0 126 char *fileTable;
mas01mc@18 127 unsigned* trackTable;
mas01cr@0 128 double* dataBuf;
mas01cr@0 129 double* inBuf;
mas01cr@0 130 double* l2normTable;
mas01cr@0 131 double* qNorm;
mas01cr@0 132 double* sNorm;
mas01cr@0 133 double* timesTable;
mas01cr@0 134
mas01cr@0 135 // Flags and parameters
mas01cr@0 136 unsigned verbosity; // how much do we want to know?
mas01cr@0 137 unsigned queryType; // point queries default
mas01cr@0 138 unsigned pointNN; // how many point NNs ?
mas01mc@18 139 unsigned trackNN; // how many track NNs ?
mas01cr@0 140 unsigned sequenceLength;
mas01cr@0 141 unsigned sequenceHop;
mas01cr@0 142 unsigned queryPoint;
mas01cr@0 143 unsigned usingQueryPoint;
mas01cr@0 144 unsigned usingTimes;
mas01cr@0 145 unsigned isClient;
mas01cr@0 146 unsigned isServer;
mas01cr@0 147 unsigned port;
mas01cr@0 148 double timesTol;
mas01mc@17 149 double radius;
mas01mc@17 150
mas01cr@0 151 // Timers
mas01cr@0 152 struct timeval tv1;
mas01cr@0 153 struct timeval tv2;
mas01cr@0 154
mas01cr@0 155 // private methods
mas01cr@32 156 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@0 157 void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01mc@18 158 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01mc@20 159 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01mc@20 160 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@0 161
mas01cr@27 162 void initTables(const char* dbName, bool forWrite, const char* inFile);
mas01cr@0 163 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 164 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 165 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@0 166 unsigned getKeyPos(char* key);
mas01cr@0 167 public:
mas01cr@0 168
mas01cr@76 169 audioDB(const unsigned argc, char* const argv[]);
mas01cr@76 170 audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult);
mas01cr@76 171 audioDB(const unsigned argc, char* const argv[], adb__statusResult *adbStatusResult);
mas01cr@97 172 void cleanup();
mas01cr@0 173 ~audioDB();
mas01cr@0 174 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 175 void get_lock(int fd, bool exclusive);
mas01cr@30 176 void release_lock(int fd);
mas01cr@0 177 void create(const char* dbName);
mas01cr@0 178 void drop();
mas01cr@0 179 void insert(const char* dbName, const char* inFile);
mas01cr@0 180 void batchinsert(const char* dbName, const char* inFile);
mas01cr@0 181 void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
mas01cr@76 182 void status(const char* dbName, adb__statusResult *adbStatusResult=0);
mas01cr@0 183 void ws_status(const char*dbName, char* hostport);
mas01mc@18 184 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 185 void l2norm(const char* dbName);
mas01cr@0 186 void dump(const char* dbName);
mas01cr@0 187
mas01cr@0 188 // web services
mas01cr@0 189 void startServer();
mas01cr@0 190
mas01cr@0 191 };
mas01mc@17 192
mas01cr@105 193 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 194 dim(0), \
mas01cr@105 195 dbName(0), \
mas01cr@105 196 inFile(0), \
mas01cr@105 197 key(0), \
mas01cr@105 198 trackFileName(0), \
mas01cr@105 199 trackFile(0), \
mas01cr@105 200 command(0), \
mas01cr@105 201 timesFileName(0), \
mas01cr@105 202 timesFile(0), \
mas01cr@105 203 dbfid(0), \
mas01cr@105 204 infid(0), \
mas01cr@105 205 db(0), \
mas01cr@105 206 indata(0), \
mas01cr@105 207 dbH(0), \
mas01cr@105 208 fileTable(0), \
mas01cr@105 209 trackTable(0), \
mas01cr@105 210 dataBuf(0), \
mas01cr@105 211 l2normTable(0), \
mas01cr@105 212 qNorm(0), \
mas01cr@105 213 timesTable(0), \
mas01cr@105 214 verbosity(1), \
mas01cr@105 215 queryType(O2_POINT_QUERY), \
mas01cr@105 216 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 217 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 218 sequenceLength(16), \
mas01cr@105 219 sequenceHop(1), \
mas01cr@105 220 queryPoint(0), \
mas01cr@105 221 usingQueryPoint(0), \
mas01cr@105 222 usingTimes(0), \
mas01cr@105 223 isClient(0), \
mas01cr@105 224 isServer(0), \
mas01cr@105 225 port(0), \
mas01cr@105 226 timesTol(0.1), \
mas01cr@105 227 radius(0)