annotate audioDB.h @ 279:dee55886eca0 sampling

make the RNG a part of the audioDB object. Easier to deal with memory discipline and initialization (though note the FIXME comment in audioDB::initTables()). Also initialize the RNG from the current time. A mature implementation would use a proper source of entropy...
author mas01cr
date Wed, 02 Jul 2008 13:53:23 +0000
parents d9dba57becd4
children
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@278 15 #include <gsl/gsl_rng.h>
mas01cr@0 16
mas01cr@0 17 // includes for web services
mas01cr@0 18 #include "soapH.h"
mas01cr@0 19 #include "cmdline.h"
mas01cr@0 20
mas01cr@0 21 #define MAXSTR 512
mas01cr@0 22
mas01cr@0 23 // Databse PRIMARY commands
mas01cr@0 24 #define COM_CREATE "--NEW"
mas01cr@0 25 #define COM_INSERT "--INSERT"
mas01cr@0 26 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 27 #define COM_QUERY "--QUERY"
mas01cr@0 28 #define COM_STATUS "--STATUS"
mas01cr@0 29 #define COM_L2NORM "--L2NORM"
mas01cr@193 30 #define COM_POWER "--POWER"
mas01cr@0 31 #define COM_DUMP "--DUMP"
mas01cr@0 32 #define COM_SERVER "--SERVER"
mas01cr@266 33 #define COM_SAMPLE "--SAMPLE"
mas01cr@0 34
mas01cr@0 35 // parameters
mas01cr@0 36 #define COM_CLIENT "--client"
mas01cr@0 37 #define COM_DATABASE "--database"
mas01cr@0 38 #define COM_QTYPE "--qtype"
mas01cr@0 39 #define COM_SEQLEN "--sequencelength"
mas01cr@0 40 #define COM_SEQHOP "--sequencehop"
mas01cr@0 41 #define COM_POINTNN "--pointnn"
mas01mc@18 42 #define COM_TRACKNN "--resultlength"
mas01cr@0 43 #define COM_QPOINT "--qpoint"
mas01cr@0 44 #define COM_FEATURES "--features"
mas01cr@0 45 #define COM_QUERYKEY "--key"
mas01cr@0 46 #define COM_KEYLIST "--keyList"
mas01cr@0 47 #define COM_TIMES "--times"
mas01cr@193 48 #define COM_QUERYPOWER "--power"
mas01cr@193 49 #define COM_RELATIVE_THRESH "--relative-threshold"
mas01cr@193 50 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
mas01cr@0 51
mas01cr@108 52 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 53 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@210 54 #define O2_FORMAT_VERSION (4U)
mas01cr@0 55
mas01cr@0 56 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 57 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 58
mas01mc@248 59 //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
mas01mc@7 60 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01cr@0 61
mas01cr@256 62 #define O2_DEFAULT_DATASIZE (1355U) // in MB
mas01cr@256 63 #define O2_DEFAULT_NTRACKS (20000U)
mas01cr@256 64 #define O2_DEFAULT_DATADIM (9U)
mas01cr@256 65
mas01cr@239 66 #define O2_MAXFILES (20000U)
mas01cr@0 67 #define O2_MAXFILESTR (256U)
mas01cr@256 68 #define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR)
mas01cr@256 69 #define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned))
mas01cr@0 70 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 71 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 72 #define O2_MAXDIM (1000U)
mas01mc@263 73 #define O2_MAXNN (1000000U)
mas01cr@0 74
mas01cr@0 75 // Flags
mas01cr@0 76 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 77 #define O2_FLAG_MINMAX (0x2U)
mas01cr@193 78 #define O2_FLAG_POWER (0x4U)
mas01cr@0 79 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 80
mas01cr@105 81 // Query types
mas01cr@105 82 #define O2_POINT_QUERY (0x4U)
mas01cr@105 83 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 84 #define O2_TRACK_QUERY (0x10U)
mas01mc@248 85 #define O2_N_SEQUENCE_QUERY (0x20U)
mas01mc@263 86 #define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)
mas01mc@248 87
mas01cr@105 88
mas01cr@0 89 // Error Codes
mas01cr@0 90 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 91
mas01cr@0 92 // Macros
mas01cr@0 93 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 94
mas01cr@108 95 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 96 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 97
mas01cr@196 98 #define ALIGN_PAGE_UP(x) ((x) + (getpagesize()-1) & ~(getpagesize()-1))
mas01cr@196 99 #define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
mas01cr@196 100
mas01cr@166 101 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 102
mas01cr@239 103 #define CHECKED_MMAP(type, var, start, length) \
mas01cr@239 104 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
mas01cr@239 105 if(tmp == (void *) -1) { \
mas01cr@239 106 error("mmap error for db table", #var, "mmap"); \
mas01cr@239 107 } \
mas01cr@239 108 var = (type) tmp; \
mas01cr@239 109 }
mas01cr@239 110
mas01cr@239 111 #define VERB_LOG(vv, ...) \
mas01cr@239 112 if(verbosity > vv) { \
mas01cr@239 113 fprintf(stderr, __VA_ARGS__); \
mas01cr@239 114 fflush(stderr); \
mas01cr@239 115 }
mas01cr@0 116
mas01cr@210 117 typedef struct dbTableHeader {
mas01cr@114 118 uint32_t magic;
mas01cr@114 119 uint32_t version;
mas01cr@114 120 uint32_t numFiles;
mas01cr@114 121 uint32_t dim;
mas01cr@114 122 uint32_t flags;
mas01cr@210 123 uint32_t headerSize;
mas01cr@196 124 off_t length;
mas01cr@196 125 off_t fileTableOffset;
mas01cr@196 126 off_t trackTableOffset;
mas01cr@196 127 off_t dataOffset;
mas01cr@196 128 off_t l2normTableOffset;
mas01cr@196 129 off_t timesTableOffset;
mas01cr@196 130 off_t powerTableOffset;
mas01cr@196 131 off_t dbSize;
mas01cr@0 132 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 133
mas01cr@239 134 class Reporter;
mas01cr@0 135
mas01cr@0 136 class audioDB{
mas01cr@0 137
mas01cr@0 138 private:
mas01cr@0 139 gengetopt_args_info args_info;
mas01cr@0 140 unsigned dim;
mas01cr@0 141 const char *dbName;
mas01cr@0 142 const char *inFile;
mas01cr@0 143 const char *hostport;
mas01cr@0 144 const char *key;
mas01mc@18 145 const char* trackFileName;
mas01cr@239 146 std::ifstream *trackFile;
mas01cr@0 147 const char *command;
mas01cr@131 148 const char *output;
mas01cr@0 149 const char *timesFileName;
mas01cr@239 150 std::ifstream *timesFile;
mas01cr@193 151 const char *powerFileName;
mas01cr@239 152 std::ifstream *powerFile;
mas01cr@193 153 int powerfd;
mas01cr@0 154
mas01cr@0 155 int dbfid;
mas01cr@196 156 bool forWrite;
mas01cr@0 157 int infid;
mas01cr@0 158 char* db;
mas01cr@0 159 char* indata;
mas01cr@0 160 struct stat statbuf;
mas01cr@0 161 dbTableHeaderPtr dbH;
mas01cr@279 162
mas01cr@279 163 gsl_rng *rng;
mas01cr@0 164
mas01cr@0 165 char *fileTable;
mas01mc@18 166 unsigned* trackTable;
mas01cr@0 167 double* dataBuf;
mas01cr@0 168 double* inBuf;
mas01cr@0 169 double* l2normTable;
mas01cr@196 170 double* timesTable;
mas01cr@193 171 double* powerTable;
mas01cr@0 172
mas01cr@196 173 size_t fileTableLength;
mas01cr@196 174 size_t trackTableLength;
mas01cr@196 175 off_t dataBufLength;
mas01cr@196 176 size_t timesTableLength;
mas01cr@196 177 size_t powerTableLength;
mas01cr@196 178 size_t l2normTableLength;
mas01cr@196 179
mas01cr@0 180 // Flags and parameters
mas01cr@0 181 unsigned verbosity; // how much do we want to know?
mas01cr@256 182
mas01cr@274 183 unsigned nsamples;
mas01cr@274 184
mas01cr@256 185 //off_t size; // given size (for creation)
mas01cr@256 186 unsigned datasize; // size in MB
mas01cr@256 187 unsigned ntracks;
mas01cr@256 188 unsigned datadim;
mas01cr@256 189
mas01cr@0 190 unsigned queryType; // point queries default
mas01cr@0 191 unsigned pointNN; // how many point NNs ?
mas01mc@18 192 unsigned trackNN; // how many track NNs ?
mas01cr@0 193 unsigned sequenceLength;
mas01cr@0 194 unsigned sequenceHop;
mas01cr@239 195 bool normalizedDistance;
mas01cr@0 196 unsigned queryPoint;
mas01cr@0 197 unsigned usingQueryPoint;
mas01cr@0 198 unsigned usingTimes;
mas01cr@193 199 unsigned usingPower;
mas01cr@0 200 unsigned isClient;
mas01cr@0 201 unsigned isServer;
mas01cr@0 202 unsigned port;
mas01cr@0 203 double timesTol;
mas01mc@17 204 double radius;
mas01cr@193 205
mas01cr@193 206 bool use_absolute_threshold;
mas01cr@193 207 double absolute_threshold;
mas01cr@193 208 bool use_relative_threshold;
mas01cr@193 209 double relative_threshold;
mas01cr@193 210
mas01mc@17 211
mas01cr@0 212 // Timers
mas01cr@0 213 struct timeval tv1;
mas01cr@0 214 struct timeval tv2;
mas01cr@0 215
mas01cr@0 216 // private methods
mas01cr@32 217 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@193 218 void sequence_sum(double *buffer, int length, int seqlen);
mas01cr@193 219 void sequence_sqrt(double *buffer, int length, int seqlen);
mas01cr@193 220 void sequence_average(double *buffer, int length, int seqlen);
mas01cr@193 221
mas01cr@239 222 void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD);
mas01cr@239 223 void delete_arrays(int track, unsigned int numVectors, double **D, double **DD);
mas01cr@239 224 void read_data(int track, double **data_buffer_p, size_t *data_buffer_size_p);
mas01cr@239 225 void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp);
mas01cr@239 226 void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp);
mas01cr@240 227 void query_loop(const char* dbName, const char* inFile, Reporter *reporter);
mas01cr@0 228
mas01cr@279 229 void initRNG();
mas01cr@196 230 void initDBHeader(const char *dbName);
mas01cr@169 231 void initInputFile(const char *inFile);
mas01cr@196 232 void initTables(const char* dbName, const char* inFile);
mas01cr@0 233 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 234 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@239 235 void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
mas01cr@193 236 void insertPowerData(unsigned n, int powerfd, double *powerdata);
mas01cr@0 237 unsigned getKeyPos(char* key);
mas01cr@0 238 public:
mas01cr@0 239
mas01cr@76 240 audioDB(const unsigned argc, char* const argv[]);
mas01cr@133 241 audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
mas01cr@133 242 audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse);
mas01cr@97 243 void cleanup();
mas01cr@0 244 ~audioDB();
mas01cr@0 245 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 246 void get_lock(int fd, bool exclusive);
mas01cr@30 247 void release_lock(int fd);
mas01cr@0 248 void create(const char* dbName);
mas01cr@0 249 void drop();
mas01cr@251 250 bool enough_per_file_space_free();
mas01cr@196 251 bool enough_data_space_free(off_t size);
mas01cr@196 252 void insert_data_vectors(off_t offset, void *buffer, size_t size);
mas01cr@0 253 void insert(const char* dbName, const char* inFile);
mas01cr@0 254 void batchinsert(const char* dbName, const char* inFile);
mas01cr@133 255 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 256 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01cr@279 257 unsigned random_track(unsigned *propTable, unsigned total);
mas01cr@266 258 void sample(const char *dbName);
mas01cr@0 259 void ws_status(const char*dbName, char* hostport);
mas01mc@18 260 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 261 void l2norm(const char* dbName);
mas01cr@193 262 void power_flag(const char *dbName);
mas01cr@193 263 bool powers_acceptable(double p1, double p2);
mas01cr@0 264 void dump(const char* dbName);
mas01cr@0 265
mas01cr@0 266 // web services
mas01cr@0 267 void startServer();
mas01cr@0 268
mas01cr@0 269 };
mas01mc@17 270
mas01cr@105 271 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 272 dim(0), \
mas01cr@105 273 dbName(0), \
mas01cr@105 274 inFile(0), \
mas01cr@105 275 key(0), \
mas01cr@105 276 trackFileName(0), \
mas01cr@105 277 trackFile(0), \
mas01cr@105 278 command(0), \
mas01cr@131 279 output(0), \
mas01cr@105 280 timesFileName(0), \
mas01cr@105 281 timesFile(0), \
mas01cr@193 282 powerFileName(0), \
mas01cr@193 283 powerFile(0), \
mas01cr@193 284 powerfd(0), \
mas01cr@105 285 dbfid(0), \
mas01cr@196 286 forWrite(false), \
mas01cr@105 287 infid(0), \
mas01cr@105 288 db(0), \
mas01cr@105 289 indata(0), \
mas01cr@105 290 dbH(0), \
mas01cr@279 291 rng(0), \
mas01cr@105 292 fileTable(0), \
mas01cr@105 293 trackTable(0), \
mas01cr@105 294 dataBuf(0), \
mas01cr@105 295 l2normTable(0), \
mas01cr@105 296 timesTable(0), \
mas01cr@196 297 fileTableLength(0), \
mas01cr@196 298 trackTableLength(0), \
mas01cr@196 299 dataBufLength(0), \
mas01cr@196 300 timesTableLength(0), \
mas01cr@196 301 powerTableLength(0), \
mas01cr@196 302 l2normTableLength(0), \
mas01cr@105 303 verbosity(1), \
mas01cr@274 304 nsamples(2000), \
mas01cr@256 305 datasize(O2_DEFAULT_DATASIZE), \
mas01cr@256 306 ntracks(O2_DEFAULT_NTRACKS), \
mas01cr@256 307 datadim(O2_DEFAULT_DATADIM), \
mas01cr@105 308 queryType(O2_POINT_QUERY), \
mas01cr@105 309 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 310 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 311 sequenceLength(16), \
mas01cr@105 312 sequenceHop(1), \
mas01cr@239 313 normalizedDistance(true), \
mas01cr@105 314 queryPoint(0), \
mas01cr@105 315 usingQueryPoint(0), \
mas01cr@105 316 usingTimes(0), \
mas01cr@193 317 usingPower(0), \
mas01cr@105 318 isClient(0), \
mas01cr@105 319 isServer(0), \
mas01cr@105 320 port(0), \
mas01cr@105 321 timesTol(0.1), \
mas01cr@193 322 radius(0), \
mas01cr@193 323 use_absolute_threshold(false), \
mas01cr@193 324 absolute_threshold(0.0), \
mas01cr@193 325 use_relative_threshold(false), \
mas01cr@193 326 relative_threshold(0.0)