annotate audioDB.h @ 196:8c81cacf5aab

Merge -r228:254 from no-big-mmap branch. Although the last log message from that branch only mentioned working create and status (-N and -S), it turned out that I seemed to have done everything right for dump and search on huge DBs to work too. Additionally: * bump the DB format version; * CHECKED_MMAP() for the powerTable; * move the powerTable above the timesTable, so that all the code everywhere which computes the length of the data buffer assuming that the timesTable is the next thing on the disk still works.
author mas01cr
date Fri, 23 Nov 2007 11:08:15 +0000
parents f9d16137e704
children 72a037f2b1e4
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@0 15
mas01cr@0 16 // includes for web services
mas01cr@0 17 #include "soapH.h"
mas01cr@0 18 #include "adb.nsmap"
mas01cr@0 19 #include "cmdline.h"
mas01cr@0 20
mas01cr@0 21 #define MAXSTR 512
mas01cr@0 22
mas01cr@0 23 // Databse PRIMARY commands
mas01cr@0 24 #define COM_CREATE "--NEW"
mas01cr@0 25 #define COM_INSERT "--INSERT"
mas01cr@0 26 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 27 #define COM_QUERY "--QUERY"
mas01cr@0 28 #define COM_STATUS "--STATUS"
mas01cr@0 29 #define COM_L2NORM "--L2NORM"
mas01cr@193 30 #define COM_POWER "--POWER"
mas01cr@0 31 #define COM_DUMP "--DUMP"
mas01cr@0 32 #define COM_SERVER "--SERVER"
mas01cr@0 33
mas01cr@0 34 // parameters
mas01cr@0 35 #define COM_CLIENT "--client"
mas01cr@0 36 #define COM_DATABASE "--database"
mas01cr@0 37 #define COM_QTYPE "--qtype"
mas01cr@0 38 #define COM_SEQLEN "--sequencelength"
mas01cr@0 39 #define COM_SEQHOP "--sequencehop"
mas01cr@0 40 #define COM_POINTNN "--pointnn"
mas01mc@18 41 #define COM_TRACKNN "--resultlength"
mas01cr@0 42 #define COM_QPOINT "--qpoint"
mas01cr@0 43 #define COM_FEATURES "--features"
mas01cr@0 44 #define COM_QUERYKEY "--key"
mas01cr@0 45 #define COM_KEYLIST "--keyList"
mas01cr@0 46 #define COM_TIMES "--times"
mas01cr@193 47 #define COM_QUERYPOWER "--power"
mas01cr@193 48 #define COM_RELATIVE_THRESH "--relative-threshold"
mas01cr@193 49 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
mas01cr@0 50
mas01cr@108 51 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 52 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@196 53 #define O2_FORMAT_VERSION (2U)
mas01cr@0 54
mas01cr@0 55 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 56 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 57
mas01mc@7 58 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01mc@7 59 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 60
mas01cr@0 61 //#define O2_MAXFILES (1000000)
mas01cr@0 62 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 63 #define O2_MAXFILESTR (256U)
mas01cr@0 64 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01mc@18 65 #define O2_TRACKTABLESIZE (sizeof(unsigned))
mas01cr@0 66 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 67 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 68 #define O2_MAXDIM (1000U)
mas01mc@17 69 #define O2_MAXNN (10000U)
mas01cr@0 70
mas01cr@0 71 // Flags
mas01cr@0 72 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 73 #define O2_FLAG_MINMAX (0x2U)
mas01cr@193 74 #define O2_FLAG_POWER (0x4U)
mas01cr@0 75 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 76
mas01cr@105 77 // Query types
mas01cr@105 78 #define O2_POINT_QUERY (0x4U)
mas01cr@105 79 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 80 #define O2_TRACK_QUERY (0x10U)
mas01cr@105 81
mas01cr@0 82 // Error Codes
mas01cr@0 83 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 84
mas01cr@0 85 // Macros
mas01cr@0 86 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 87
mas01cr@108 88 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 89 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 90
mas01cr@196 91 #define ALIGN_PAGE_UP(x) ((x) + (getpagesize()-1) & ~(getpagesize()-1))
mas01cr@196 92 #define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
mas01cr@196 93
mas01cr@166 94 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 95
mas01cr@0 96 using namespace std;
mas01cr@0 97
mas01cr@0 98 typedef struct dbTableHeader{
mas01cr@114 99 uint32_t magic;
mas01cr@114 100 uint32_t version;
mas01cr@114 101 uint32_t numFiles;
mas01cr@114 102 uint32_t dim;
mas01cr@114 103 uint32_t flags;
mas01cr@196 104 off_t length;
mas01cr@196 105 off_t fileTableOffset;
mas01cr@196 106 off_t trackTableOffset;
mas01cr@196 107 off_t dataOffset;
mas01cr@196 108 off_t l2normTableOffset;
mas01cr@196 109 off_t timesTableOffset;
mas01cr@196 110 off_t powerTableOffset;
mas01cr@196 111 off_t dbSize;
mas01cr@0 112 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 113
mas01cr@0 114
mas01cr@0 115 class audioDB{
mas01cr@0 116
mas01cr@0 117 private:
mas01cr@0 118 gengetopt_args_info args_info;
mas01cr@0 119 unsigned dim;
mas01cr@0 120 const char *dbName;
mas01cr@0 121 const char *inFile;
mas01cr@0 122 const char *hostport;
mas01cr@0 123 const char *key;
mas01mc@18 124 const char* trackFileName;
mas01mc@18 125 ifstream *trackFile;
mas01cr@0 126 const char *command;
mas01cr@131 127 const char *output;
mas01cr@0 128 const char *timesFileName;
mas01cr@0 129 ifstream *timesFile;
mas01cr@193 130 const char *powerFileName;
mas01cr@193 131 ifstream *powerFile;
mas01cr@193 132 int powerfd;
mas01cr@0 133
mas01cr@0 134 int dbfid;
mas01cr@196 135 bool forWrite;
mas01cr@0 136 int infid;
mas01cr@0 137 char* db;
mas01cr@0 138 char* indata;
mas01cr@0 139 struct stat statbuf;
mas01cr@0 140 dbTableHeaderPtr dbH;
mas01cr@0 141
mas01cr@0 142 char *fileTable;
mas01mc@18 143 unsigned* trackTable;
mas01cr@0 144 double* dataBuf;
mas01cr@0 145 double* inBuf;
mas01cr@0 146 double* l2normTable;
mas01cr@0 147 double* qNorm;
mas01cr@0 148 double* sNorm;
mas01cr@196 149 double* timesTable;
mas01cr@193 150 double* powerTable;
mas01cr@0 151
mas01cr@196 152 size_t fileTableLength;
mas01cr@196 153 size_t trackTableLength;
mas01cr@196 154 off_t dataBufLength;
mas01cr@196 155 size_t timesTableLength;
mas01cr@196 156 size_t powerTableLength;
mas01cr@196 157 size_t l2normTableLength;
mas01cr@196 158
mas01cr@0 159 // Flags and parameters
mas01cr@0 160 unsigned verbosity; // how much do we want to know?
mas01cr@196 161 off_t size; // given size (for creation)
mas01cr@0 162 unsigned queryType; // point queries default
mas01cr@0 163 unsigned pointNN; // how many point NNs ?
mas01mc@18 164 unsigned trackNN; // how many track NNs ?
mas01cr@0 165 unsigned sequenceLength;
mas01cr@0 166 unsigned sequenceHop;
mas01cr@0 167 unsigned queryPoint;
mas01cr@0 168 unsigned usingQueryPoint;
mas01cr@0 169 unsigned usingTimes;
mas01cr@193 170 unsigned usingPower;
mas01cr@0 171 unsigned isClient;
mas01cr@0 172 unsigned isServer;
mas01cr@0 173 unsigned port;
mas01cr@0 174 double timesTol;
mas01mc@17 175 double radius;
mas01cr@193 176
mas01cr@193 177 bool use_absolute_threshold;
mas01cr@193 178 double absolute_threshold;
mas01cr@193 179 bool use_relative_threshold;
mas01cr@193 180 double relative_threshold;
mas01cr@193 181
mas01mc@17 182
mas01cr@0 183 // Timers
mas01cr@0 184 struct timeval tv1;
mas01cr@0 185 struct timeval tv2;
mas01cr@0 186
mas01cr@0 187 // private methods
mas01cr@32 188 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@133 189 void pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 190 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@193 191 void sequence_sum(double *buffer, int length, int seqlen);
mas01cr@193 192 void sequence_sqrt(double *buffer, int length, int seqlen);
mas01cr@193 193 void sequence_average(double *buffer, int length, int seqlen);
mas01cr@193 194
mas01cr@133 195 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 196 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@0 197
mas01cr@196 198 void initDBHeader(const char *dbName);
mas01cr@169 199 void initInputFile(const char *inFile);
mas01cr@196 200 void initTables(const char* dbName, const char* inFile);
mas01cr@0 201 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 202 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 203 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@193 204 void insertPowerData(unsigned n, int powerfd, double *powerdata);
mas01cr@0 205 unsigned getKeyPos(char* key);
mas01cr@0 206 public:
mas01cr@0 207
mas01cr@76 208 audioDB(const unsigned argc, char* const argv[]);
mas01cr@133 209 audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
mas01cr@133 210 audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse);
mas01cr@97 211 void cleanup();
mas01cr@0 212 ~audioDB();
mas01cr@0 213 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 214 void get_lock(int fd, bool exclusive);
mas01cr@30 215 void release_lock(int fd);
mas01cr@0 216 void create(const char* dbName);
mas01cr@0 217 void drop();
mas01cr@196 218 bool enough_data_space_free(off_t size);
mas01cr@196 219 void insert_data_vectors(off_t offset, void *buffer, size_t size);
mas01cr@0 220 void insert(const char* dbName, const char* inFile);
mas01cr@0 221 void batchinsert(const char* dbName, const char* inFile);
mas01cr@133 222 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 223 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01cr@0 224 void ws_status(const char*dbName, char* hostport);
mas01mc@18 225 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 226 void l2norm(const char* dbName);
mas01cr@193 227 void power_flag(const char *dbName);
mas01cr@193 228 bool powers_acceptable(double p1, double p2);
mas01cr@0 229 void dump(const char* dbName);
mas01cr@0 230
mas01cr@0 231 // web services
mas01cr@0 232 void startServer();
mas01cr@0 233
mas01cr@0 234 };
mas01mc@17 235
mas01cr@105 236 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 237 dim(0), \
mas01cr@105 238 dbName(0), \
mas01cr@105 239 inFile(0), \
mas01cr@105 240 key(0), \
mas01cr@105 241 trackFileName(0), \
mas01cr@105 242 trackFile(0), \
mas01cr@105 243 command(0), \
mas01cr@131 244 output(0), \
mas01cr@105 245 timesFileName(0), \
mas01cr@105 246 timesFile(0), \
mas01cr@193 247 powerFileName(0), \
mas01cr@193 248 powerFile(0), \
mas01cr@193 249 powerfd(0), \
mas01cr@105 250 dbfid(0), \
mas01cr@196 251 forWrite(false), \
mas01cr@105 252 infid(0), \
mas01cr@105 253 db(0), \
mas01cr@105 254 indata(0), \
mas01cr@105 255 dbH(0), \
mas01cr@105 256 fileTable(0), \
mas01cr@105 257 trackTable(0), \
mas01cr@105 258 dataBuf(0), \
mas01cr@105 259 l2normTable(0), \
mas01cr@105 260 qNorm(0), \
mas01cr@105 261 timesTable(0), \
mas01cr@196 262 fileTableLength(0), \
mas01cr@196 263 trackTableLength(0), \
mas01cr@196 264 dataBufLength(0), \
mas01cr@196 265 timesTableLength(0), \
mas01cr@196 266 powerTableLength(0), \
mas01cr@196 267 l2normTableLength(0), \
mas01cr@105 268 verbosity(1), \
mas01cr@129 269 size(O2_DEFAULTDBSIZE), \
mas01cr@105 270 queryType(O2_POINT_QUERY), \
mas01cr@105 271 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 272 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 273 sequenceLength(16), \
mas01cr@105 274 sequenceHop(1), \
mas01cr@105 275 queryPoint(0), \
mas01cr@105 276 usingQueryPoint(0), \
mas01cr@105 277 usingTimes(0), \
mas01cr@193 278 usingPower(0), \
mas01cr@105 279 isClient(0), \
mas01cr@105 280 isServer(0), \
mas01cr@105 281 port(0), \
mas01cr@105 282 timesTol(0.1), \
mas01cr@193 283 radius(0), \
mas01cr@193 284 use_absolute_threshold(false), \
mas01cr@193 285 absolute_threshold(0.0), \
mas01cr@193 286 use_relative_threshold(false), \
mas01cr@193 287 relative_threshold(0.0)