annotate audioDB.h @ 174:2826339b4e92 no-big-mmap

mmap() the various tables separately on init. Continue mmap()ing the whole database as well, as there is still use of it elsewhere in one or two naughty places. mmap()ing individual tables means aligning to page boundaries; make it so.
author mas01cr
date Wed, 14 Nov 2007 16:32:18 +0000
parents cdd441dcc9a8
children c32bf13c3978
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@0 15
mas01cr@0 16 // includes for web services
mas01cr@0 17 #include "soapH.h"
mas01cr@0 18 #include "adb.nsmap"
mas01cr@0 19 #include "cmdline.h"
mas01cr@0 20
mas01cr@0 21 #define MAXSTR 512
mas01cr@0 22
mas01cr@0 23 // Databse PRIMARY commands
mas01cr@0 24 #define COM_CREATE "--NEW"
mas01cr@0 25 #define COM_INSERT "--INSERT"
mas01cr@0 26 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 27 #define COM_QUERY "--QUERY"
mas01cr@0 28 #define COM_STATUS "--STATUS"
mas01cr@0 29 #define COM_L2NORM "--L2NORM"
mas01cr@0 30 #define COM_DUMP "--DUMP"
mas01cr@0 31 #define COM_SERVER "--SERVER"
mas01cr@0 32
mas01cr@0 33 // parameters
mas01cr@0 34 #define COM_CLIENT "--client"
mas01cr@0 35 #define COM_DATABASE "--database"
mas01cr@0 36 #define COM_QTYPE "--qtype"
mas01cr@0 37 #define COM_SEQLEN "--sequencelength"
mas01cr@0 38 #define COM_SEQHOP "--sequencehop"
mas01cr@0 39 #define COM_POINTNN "--pointnn"
mas01mc@18 40 #define COM_TRACKNN "--resultlength"
mas01cr@0 41 #define COM_QPOINT "--qpoint"
mas01cr@0 42 #define COM_FEATURES "--features"
mas01cr@0 43 #define COM_QUERYKEY "--key"
mas01cr@0 44 #define COM_KEYLIST "--keyList"
mas01cr@0 45 #define COM_TIMES "--times"
mas01cr@0 46
mas01cr@108 47 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 48 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@108 49 #define O2_FORMAT_VERSION (0U)
mas01cr@0 50
mas01cr@0 51 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 52 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 53
mas01mc@7 54 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01mc@7 55 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 56
mas01cr@0 57 //#define O2_MAXFILES (1000000)
mas01cr@0 58 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 59 #define O2_MAXFILESTR (256U)
mas01cr@0 60 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01mc@18 61 #define O2_TRACKTABLESIZE (sizeof(unsigned))
mas01cr@0 62 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 63 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 64 #define O2_MAXDIM (1000U)
mas01mc@17 65 #define O2_MAXNN (10000U)
mas01cr@0 66
mas01cr@0 67 // Flags
mas01cr@0 68 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 69 #define O2_FLAG_MINMAX (0x2U)
mas01cr@0 70 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 71
mas01cr@105 72 // Query types
mas01cr@105 73 #define O2_POINT_QUERY (0x4U)
mas01cr@105 74 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 75 #define O2_TRACK_QUERY (0x10U)
mas01cr@105 76
mas01cr@0 77 // Error Codes
mas01cr@0 78 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 79
mas01cr@0 80 // Macros
mas01cr@0 81 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 82
mas01cr@108 83 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 84 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 85
mas01cr@174 86 #define ALIGN_PAGE_UP(x,w) ((x) + (getpagesize()-1) & ~(getpagesize()-1))
mas01cr@174 87 #define ALIGN_PAGE_DOWN(x,w) ((x) & ~(getpagesize()-1))
mas01cr@174 88
mas01cr@166 89 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 90
mas01cr@0 91 using namespace std;
mas01cr@0 92
mas01cr@0 93 typedef struct dbTableHeader{
mas01cr@114 94 uint32_t magic;
mas01cr@114 95 uint32_t version;
mas01cr@114 96 uint32_t numFiles;
mas01cr@114 97 uint32_t dim;
mas01cr@114 98 uint32_t flags;
mas01cr@111 99 // FIXME: these lengths and offsets should be size_t or off_t, but
mas01cr@111 100 // that causes this header (and hence audioDB files) to be
mas01cr@111 101 // unportable between 32 and 64-bit architectures. Making them
mas01cr@114 102 // uint32_t isn't the real answer, as it means we won't be able to
mas01cr@114 103 // scale to really large collections easily but it works around the
mas01cr@128 104 // problem. Expanding to 64 bits will of course need a change in
mas01cr@128 105 // file format version. -- CSR, 2007-10-05
mas01cr@114 106 uint32_t length;
mas01cr@114 107 uint32_t fileTableOffset;
mas01cr@114 108 uint32_t trackTableOffset;
mas01cr@114 109 uint32_t dataOffset;
mas01cr@114 110 uint32_t l2normTableOffset;
mas01cr@114 111 uint32_t timesTableOffset;
mas01cr@128 112 uint32_t dbSize;
mas01cr@0 113 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 114
mas01cr@0 115
mas01cr@0 116 class audioDB{
mas01cr@0 117
mas01cr@0 118 private:
mas01cr@0 119 gengetopt_args_info args_info;
mas01cr@0 120 unsigned dim;
mas01cr@0 121 const char *dbName;
mas01cr@0 122 const char *inFile;
mas01cr@0 123 const char *hostport;
mas01cr@0 124 const char *key;
mas01mc@18 125 const char* trackFileName;
mas01mc@18 126 ifstream *trackFile;
mas01cr@0 127 const char *command;
mas01cr@131 128 const char *output;
mas01cr@0 129 const char *timesFileName;
mas01cr@0 130 ifstream *timesFile;
mas01cr@0 131
mas01cr@0 132 int dbfid;
mas01cr@0 133 int infid;
mas01cr@0 134 char* db;
mas01cr@0 135 char* indata;
mas01cr@0 136 struct stat statbuf;
mas01cr@0 137 dbTableHeaderPtr dbH;
mas01cr@0 138
mas01cr@0 139 char *fileTable;
mas01mc@18 140 unsigned* trackTable;
mas01cr@0 141 double* dataBuf;
mas01cr@0 142 double* inBuf;
mas01cr@0 143 double* l2normTable;
mas01cr@0 144 double* qNorm;
mas01cr@0 145 double* sNorm;
mas01cr@0 146 double* timesTable;
mas01cr@0 147
mas01cr@0 148 // Flags and parameters
mas01cr@0 149 unsigned verbosity; // how much do we want to know?
mas01cr@129 150 unsigned size; // given size (for creation)
mas01cr@0 151 unsigned queryType; // point queries default
mas01cr@0 152 unsigned pointNN; // how many point NNs ?
mas01mc@18 153 unsigned trackNN; // how many track NNs ?
mas01cr@0 154 unsigned sequenceLength;
mas01cr@0 155 unsigned sequenceHop;
mas01cr@0 156 unsigned queryPoint;
mas01cr@0 157 unsigned usingQueryPoint;
mas01cr@0 158 unsigned usingTimes;
mas01cr@0 159 unsigned isClient;
mas01cr@0 160 unsigned isServer;
mas01cr@0 161 unsigned port;
mas01cr@0 162 double timesTol;
mas01mc@17 163 double radius;
mas01mc@17 164
mas01cr@0 165 // Timers
mas01cr@0 166 struct timeval tv1;
mas01cr@0 167 struct timeval tv2;
mas01cr@0 168
mas01cr@0 169 // private methods
mas01cr@32 170 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@133 171 void pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 172 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 173 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 174 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@0 175
mas01cr@120 176 void initDBHeader(const char *dbName, bool forWrite);
mas01cr@169 177 void initInputFile(const char *inFile);
mas01cr@27 178 void initTables(const char* dbName, bool forWrite, const char* inFile);
mas01cr@0 179 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 180 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 181 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@0 182 unsigned getKeyPos(char* key);
mas01cr@0 183 public:
mas01cr@0 184
mas01cr@76 185 audioDB(const unsigned argc, char* const argv[]);
mas01cr@133 186 audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
mas01cr@133 187 audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse);
mas01cr@97 188 void cleanup();
mas01cr@0 189 ~audioDB();
mas01cr@0 190 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 191 void get_lock(int fd, bool exclusive);
mas01cr@30 192 void release_lock(int fd);
mas01cr@0 193 void create(const char* dbName);
mas01cr@0 194 void drop();
mas01cr@0 195 void insert(const char* dbName, const char* inFile);
mas01cr@0 196 void batchinsert(const char* dbName, const char* inFile);
mas01cr@133 197 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 198 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01cr@0 199 void ws_status(const char*dbName, char* hostport);
mas01mc@18 200 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 201 void l2norm(const char* dbName);
mas01cr@0 202 void dump(const char* dbName);
mas01cr@0 203
mas01cr@0 204 // web services
mas01cr@0 205 void startServer();
mas01cr@0 206
mas01cr@0 207 };
mas01mc@17 208
mas01cr@105 209 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 210 dim(0), \
mas01cr@105 211 dbName(0), \
mas01cr@105 212 inFile(0), \
mas01cr@105 213 key(0), \
mas01cr@105 214 trackFileName(0), \
mas01cr@105 215 trackFile(0), \
mas01cr@105 216 command(0), \
mas01cr@131 217 output(0), \
mas01cr@105 218 timesFileName(0), \
mas01cr@105 219 timesFile(0), \
mas01cr@105 220 dbfid(0), \
mas01cr@105 221 infid(0), \
mas01cr@105 222 db(0), \
mas01cr@105 223 indata(0), \
mas01cr@105 224 dbH(0), \
mas01cr@105 225 fileTable(0), \
mas01cr@105 226 trackTable(0), \
mas01cr@105 227 dataBuf(0), \
mas01cr@105 228 l2normTable(0), \
mas01cr@105 229 qNorm(0), \
mas01cr@105 230 timesTable(0), \
mas01cr@105 231 verbosity(1), \
mas01cr@129 232 size(O2_DEFAULTDBSIZE), \
mas01cr@105 233 queryType(O2_POINT_QUERY), \
mas01cr@105 234 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 235 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 236 sequenceLength(16), \
mas01cr@105 237 sequenceHop(1), \
mas01cr@105 238 queryPoint(0), \
mas01cr@105 239 usingQueryPoint(0), \
mas01cr@105 240 usingTimes(0), \
mas01cr@105 241 isClient(0), \
mas01cr@105 242 isServer(0), \
mas01cr@105 243 port(0), \
mas01cr@105 244 timesTol(0.1), \
mas01cr@105 245 radius(0)