annotate audioDB.h @ 601:82d23418d867

Fix some fd leaks in the command-line binary Strictly speaking, they're not really leaks, because the only codepath that suffers from these leaks exits immediately afterwards. On the other hand, this fix makes valgrind on e.g. tests/0025 happier, going from 5 errors to none.
author mas01cr
date Fri, 14 Aug 2009 16:39:32 +0000
parents 4eedc18634f5
children 6d5d4c733781
rev   line source
mas01mc@292 1 #ifndef __AUDIODB_H_
mas01mc@292 2 #define __AUDIODB_H_
mas01mc@292 3
mas01cr@0 4 #include <stdio.h>
mas01cr@0 5 #include <stdlib.h>
mas01cr@0 6 #include <sys/types.h>
mas01cr@0 7 #include <sys/stat.h>
mas01cr@0 8 #include <sys/mman.h>
mas01cr@0 9 #include <fcntl.h>
mas01cr@0 10 #include <string.h>
mas01cr@0 11 #include <iostream>
mas01cr@0 12 #include <fstream>
mas01cr@302 13 #include <set>
mas01cr@498 14 #include <map>
mas01cr@302 15 #include <string>
mas01cr@0 16 #include <math.h>
mas01cr@0 17 #include <sys/time.h>
mas01cr@0 18 #include <assert.h>
mas01cr@62 19 #include <float.h>
mas01cr@104 20 #include <signal.h>
mas01cr@280 21 #include <gsl/gsl_rng.h>
mas01cr@0 22
mas01mc@292 23 // includes for LSH indexing
mas01cr@498 24 extern "C" {
mas01cr@498 25 #include "audioDB_API.h"
mas01cr@498 26 }
mas01cr@509 27 #include "audioDB-internals.h"
mas01mc@292 28 #include "ReporterBase.h"
mas01cr@498 29 #include "accumulator.h"
mas01mc@292 30 #include "lshlib.h"
mas01mc@292 31
mas01cr@0 32 // includes for web services
mas01cr@0 33 #include "soapH.h"
mas01cr@0 34 #include "cmdline.h"
mas01cr@0 35
mas01cr@509 36 #define MAXSTR ADB_MAXSTR
mas01cr@0 37
mas01cr@0 38 // Databse PRIMARY commands
mas01cr@0 39 #define COM_CREATE "--NEW"
mas01cr@0 40 #define COM_INSERT "--INSERT"
mas01cr@0 41 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 42 #define COM_QUERY "--QUERY"
mas01cr@0 43 #define COM_STATUS "--STATUS"
mas01cr@0 44 #define COM_L2NORM "--L2NORM"
mas01cr@193 45 #define COM_POWER "--POWER"
mas01cr@0 46 #define COM_DUMP "--DUMP"
mas01cr@0 47 #define COM_SERVER "--SERVER"
mas01mc@292 48 #define COM_INDEX "--INDEX"
mas01cr@280 49 #define COM_SAMPLE "--SAMPLE"
mas01mc@334 50 #define COM_LISZT "--LISZT"
mas01cr@0 51
mas01cr@0 52 // parameters
mas01cr@0 53 #define COM_CLIENT "--client"
mas01cr@0 54 #define COM_DATABASE "--database"
mas01cr@0 55 #define COM_QTYPE "--qtype"
mas01cr@0 56 #define COM_SEQLEN "--sequencelength"
mas01cr@0 57 #define COM_SEQHOP "--sequencehop"
mas01cr@0 58 #define COM_POINTNN "--pointnn"
mas01mc@307 59 #define COM_RADIUS "--radius"
mas01mc@18 60 #define COM_TRACKNN "--resultlength"
mas01cr@0 61 #define COM_QPOINT "--qpoint"
mas01cr@0 62 #define COM_FEATURES "--features"
mas01cr@0 63 #define COM_QUERYKEY "--key"
mas01cr@0 64 #define COM_KEYLIST "--keyList"
mas01cr@0 65 #define COM_TIMES "--times"
mas01cr@193 66 #define COM_QUERYPOWER "--power"
mas01cr@193 67 #define COM_RELATIVE_THRESH "--relative-threshold"
mas01cr@193 68 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
mas01mc@310 69 #define COM_EXHAUSTIVE "--exhaustive"
mas01mc@310 70 #define COM_LSH_EXACT "--lsh_exact"
mas01mc@471 71 #define COM_NO_UNIT_NORMING "--no_unit_norming"
mas01cr@0 72
mas01cr@0 73 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 74 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 75
mas01mc@248 76 //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
mas01mc@7 77 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01cr@0 78
mas01cr@509 79 #define O2_DEFAULT_DATASIZE (1355U) /* in MB */
mas01cr@509 80 #define O2_DEFAULT_NTRACKS (20000U)
mas01cr@509 81 #define O2_DEFAULT_DATADIM (9U)
mas01mc@295 82
mas01mc@295 83 // LIMIT PARAMETERS
mas01mc@292 84 #define O2_REALTYPE (double)
mas01mc@324 85 #define O2_MAXFILES (1000000U)
mas01cr@509 86 #define O2_MAXFILESTR ADB_FILETABLE_ENTRY_SIZE
mas01cr@509 87 #define O2_FILETABLE_ENTRY_SIZE ADB_FILETABLE_ENTRY_SIZE
mas01cr@509 88 #define O2_TRACKTABLE_ENTRY_SIZE ADB_TRACKTABLE_ENTRY_SIZE
mas01cr@0 89 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 90 #define O2_MEANNUMVECTORS (1000U)
mas01mc@464 91 #define O2_MAXDIM (20000U)
mas01mc@263 92 #define O2_MAXNN (1000000U)
mas01mc@292 93 #define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence
mas01mc@324 94 #define O2_MAXTRACKS (1000000U) // maximum number of tracks
mas01mc@534 95
mas01mc@292 96 #define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB
mas01mc@324 97 #define O2_SERIAL_MAX_TRACKBATCH (1000000)
mas01mc@324 98 #define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes)
mas01mc@324 99 #define O2_LARGE_ADB_NTRACKS (O2_DEFAULT_NTRACKS+1) // ntracks at which features are kept externally
mas01mc@324 100 #define O2_MAX_VECTORS ( O2_MEANNUMVECTORS * O2_MAXTRACKS )
mas01cr@0 101
mas01cr@0 102 // Flags
mas01cr@509 103 #define O2_FLAG_L2NORM ADB_HEADER_FLAG_L2NORM
mas01cr@0 104 #define O2_FLAG_MINMAX (0x2U)
mas01cr@509 105 #define O2_FLAG_POWER ADB_HEADER_FLAG_POWER
mas01cr@509 106 #define O2_FLAG_TIMES ADB_HEADER_FLAG_TIMES
mas01cr@509 107 #define O2_FLAG_LARGE_ADB ADB_HEADER_FLAG_REFERENCES
mas01mc@301 108 #define DISPLAY_FLAG(x) (x?"on":"off")
mas01cr@0 109
mas01cr@105 110 // Query types
mas01cr@105 111 #define O2_POINT_QUERY (0x4U)
mas01cr@105 112 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 113 #define O2_TRACK_QUERY (0x10U)
mas01mc@248 114 #define O2_N_SEQUENCE_QUERY (0x20U)
mas01mc@263 115 #define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)
mas01mc@248 116
mas01cr@0 117 // Error Codes
mas01cr@0 118 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 119
mas01cr@0 120 // Macros
mas01cr@0 121 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 122
mas01cr@370 123 #define ALIGN_UP(x,w) (((x) + ((1<<w)-1)) & ~((1<<w)-1))
mas01cr@108 124 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 125
mas01cr@370 126 #define ALIGN_PAGE_UP(x) (((x) + (getpagesize()-1)) & ~(getpagesize()-1))
mas01cr@196 127 #define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
mas01cr@196 128
mas01cr@166 129 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 130
mas01cr@239 131 #define CHECKED_MMAP(type, var, start, length) \
mas01cr@239 132 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
mas01cr@239 133 if(tmp == (void *) -1) { \
mas01cr@239 134 error("mmap error for db table", #var, "mmap"); \
mas01cr@239 135 } \
mas01cr@239 136 var = (type) tmp; \
mas01cr@239 137 }
mas01cr@239 138
mas01cr@370 139 #define CHECKED_READ(fd, buf, count) \
mas01cr@370 140 { size_t tmpcount = count; \
mas01cr@370 141 ssize_t tmp = read(fd, buf, tmpcount); \
mas01cr@370 142 if(tmp == -1) { \
mas01cr@370 143 error("read error", "", "read"); \
mas01cr@370 144 } else if((size_t) tmp != tmpcount) { \
mas01cr@370 145 error("short read", ""); \
mas01cr@370 146 } \
mas01cr@370 147 }
mas01cr@370 148
mas01cr@370 149 #define CHECKED_WRITE(fd, buf, count) \
mas01cr@370 150 { size_t tmpcount = count; \
mas01cr@370 151 ssize_t tmp = write(fd, buf, tmpcount); \
mas01cr@370 152 if(tmp == -1) { \
mas01cr@370 153 error("write error", "", "write"); \
mas01cr@370 154 } else if((size_t) tmp != tmpcount) { \
mas01cr@370 155 error("short write", ""); \
mas01cr@370 156 } \
mas01cr@370 157 }
mas01cr@370 158
mas01cr@239 159 #define VERB_LOG(vv, ...) \
mas01cr@239 160 if(verbosity > vv) { \
mas01cr@239 161 fprintf(stderr, __VA_ARGS__); \
mas01cr@239 162 fflush(stderr); \
mas01cr@239 163 }
mas01cr@0 164
mas01mc@324 165 // We will only use this in a 32-bit address space
mas01mc@324 166 // So map the off_t down to 32-bits first
mas01mc@324 167 #define INSERT_FILETABLE_STRING(TABLE, STR) \
mas01mc@324 168 strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR));
mas01mc@324 169
mas01mc@324 170 #define SAFE_DELETE(PTR) delete PTR; PTR=0;
mas01mc@324 171 #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;
mas01mc@324 172
mas01mc@324 173 extern char* SERVER_ADB_ROOT;
mas01mc@324 174 extern char* SERVER_ADB_FEATURE_ROOT;
mas01mc@308 175
mas01mc@308 176 class audioDB{
mas01cr@0 177 private:
mas01cr@0 178 gengetopt_args_info args_info;
mas01cr@0 179 unsigned dim;
mas01cr@0 180 const char *dbName;
mas01cr@0 181 const char *inFile;
mas01cr@0 182 const char *hostport;
mas01cr@0 183 const char *key;
mas01mc@18 184 const char* trackFileName;
mas01cr@239 185 std::ifstream *trackFile;
mas01cr@0 186 const char *command;
mas01cr@131 187 const char *output;
mas01cr@0 188 const char *timesFileName;
mas01cr@239 189 std::ifstream *timesFile;
mas01cr@193 190 const char *powerFileName;
mas01cr@239 191 std::ifstream *powerFile;
mas01mc@324 192 const char* adb_root;
mas01mc@324 193 const char* adb_feature_root;
mas01mc@324 194
mas01cr@193 195 int powerfd;
mas01cr@0 196 int dbfid;
mas01mc@292 197 int lshfid;
mas01cr@196 198 bool forWrite;
mas01cr@0 199 int infid;
mas01cr@0 200 struct stat statbuf;
mas01cr@509 201 struct adbheader *dbH;
mas01cr@498 202 struct adb *adb;
mas01cr@284 203
mas01cr@284 204 gsl_rng *rng;
mas01cr@0 205
mas01mc@324 206 char* fileTable;
mas01mc@18 207 unsigned* trackTable;
mas01cr@0 208 double* l2normTable;
mas01cr@196 209 double* timesTable;
mas01cr@193 210 double* powerTable;
mas01cr@0 211
mas01mc@324 212 char* featureFileNameTable;
mas01mc@324 213 char* timesFileNameTable;
mas01mc@324 214 char* powerFileNameTable;
mas01mc@324 215
mas01cr@196 216 size_t fileTableLength;
mas01cr@196 217 size_t trackTableLength;
mas01cr@196 218 size_t timesTableLength;
mas01cr@196 219 size_t powerTableLength;
mas01cr@196 220 size_t l2normTableLength;
mas01cr@196 221
mas01cr@0 222 // Flags and parameters
mas01cr@0 223 unsigned verbosity; // how much do we want to know?
mas01cr@256 224
mas01cr@280 225 unsigned nsamples;
mas01cr@280 226
mas01cr@256 227 //off_t size; // given size (for creation)
mas01cr@256 228 unsigned datasize; // size in MB
mas01cr@256 229 unsigned ntracks;
mas01cr@256 230 unsigned datadim;
mas01cr@256 231
mas01cr@0 232 unsigned queryType; // point queries default
mas01cr@0 233 unsigned pointNN; // how many point NNs ?
mas01mc@18 234 unsigned trackNN; // how many track NNs ?
mas01cr@0 235 unsigned sequenceLength;
mas01cr@0 236 unsigned sequenceHop;
mas01cr@239 237 bool normalizedDistance;
mas01mc@292 238 bool no_unit_norming;
mas01cr@0 239 unsigned queryPoint;
mas01cr@0 240 unsigned usingQueryPoint;
mas01cr@0 241 unsigned usingTimes;
mas01cr@193 242 unsigned usingPower;
mas01cr@0 243 unsigned isClient;
mas01cr@0 244 unsigned isServer;
mas01cr@0 245 unsigned port;
mas01cr@0 246 double timesTol;
mas01mc@17 247 double radius;
mas01mc@292 248 bool query_from_key;
mas01mc@292 249 Uns32T query_from_key_index;
mas01cr@193 250 bool use_absolute_threshold;
mas01cr@193 251 double absolute_threshold;
mas01cr@193 252 bool use_relative_threshold;
mas01cr@193 253 double relative_threshold;
mas01mc@334 254
mas01mc@292 255 ReporterBase* reporter; // track/point reporter
mas01mc@292 256
mas01mc@334 257 // LISZT parameters
mas01mc@334 258 unsigned lisztOffset;
mas01mc@334 259 unsigned lisztLength;
mas01mc@334 260
mas01cr@0 261 // private methods
mas01cr@572 262 void error(const char* a, const char* b = "", const char *sysFunc = 0) __attribute__ ((noreturn));
mas01cr@193 263
mas01cr@498 264 void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
mas01cr@284 265 void initRNG();
mas01cr@196 266 void initDBHeader(const char *dbName);
mas01cr@498 267 void initInputFile(const char *inFile);
mas01mc@292 268 void initTables(const char* dbName, const char* inFile = 0);
mas01mc@292 269 void initTablesFromKey(const char* dbName, const Uns32T queryIndex);
mas01mc@324 270 void prefix_name(char** const name, const char* prefix);
mas01mc@324 271
mas01cr@0 272 public:
mas01cr@370 273 audioDB(const unsigned argc, const char *argv[]);
mas01cr@508 274 audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__queryResponse *adbQueryResponse);
mas01cr@370 275 audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse);
mas01cr@548 276 audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__lisztResponse *adbLisztResponse);
mas01mc@334 277
mas01cr@97 278 void cleanup();
mas01cr@0 279 ~audioDB();
mas01cr@370 280 int processArgs(const unsigned argc, const char* argv[]);
mas01cr@0 281 void create(const char* dbName);
mas01cr@0 282 void insert(const char* dbName, const char* inFile);
mas01cr@0 283 void batchinsert(const char* dbName, const char* inFile);
mas01cr@508 284 void query(const char* dbName, const char* inFile, struct soap *soap=0, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 285 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01ik@355 286
mas01cr@284 287 unsigned random_track(unsigned *propTable, unsigned total);
mas01cr@280 288 void sample(const char *dbName);
mas01cr@0 289 void l2norm(const char* dbName);
mas01cr@193 290 void power_flag(const char *dbName);
mas01cr@0 291 void dump(const char* dbName);
mas01cr@548 292 void liszt(const char* dbName, unsigned offset, unsigned numLines, struct soap *soap=0, adb__lisztResponse* adbLisztResponse=0);
mas01cr@0 293
mas01mc@292 294 // LSH indexing parameters and data structures
mas01mc@292 295 LSH* lsh;
mas01mc@292 296 bool lsh_in_core; // load LSH tables for query into core (true) or keep on disk (false)
mas01mc@292 297 bool lsh_use_u_functions;
mas01mc@292 298 bool lsh_exact; // flag to indicate use exact evaluation of points returned by LSH
mas01mc@308 299 bool WS_load_index; // flag to indicate that we want to make a Web Services index memory resident
mas01mc@292 300 double lsh_param_w; // Width of LSH hash-function bins
mas01mc@292 301 Uns32T lsh_param_k; // Number of independent hash functions
mas01mc@292 302 Uns32T lsh_param_m; // Combinatorial parameter for m(m-1)/2 hash tables
mas01mc@292 303 Uns32T lsh_param_N; // Number of rows per hash table
mas01mc@292 304 Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration
mas01mc@292 305 Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row
mas01mc@292 306
mas01mc@292 307 // LSH indexing and retrieval methods
mas01mc@292 308 void index_index_db(const char* dbName);
mas01mc@292 309 void index_initialize(double**,double**,double**,double**,unsigned int*);
mas01mc@292 310 void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
mas01mc@292 311 int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
mas01mc@292 312 Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
mas01cr@498 313 void insertPowerData(unsigned n, int powerfd, double *powerdata);
mas01mc@324 314 void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
mas01mc@324 315
mas01mc@292 316 // Web Services
mas01cr@0 317 void startServer();
mas01ik@355 318
mas01mc@308 319 void ws_status(const char*dbName, char* hostport);
mas01mc@308 320 void ws_query(const char*dbName, const char *featureFileName, const char* hostport);
mas01mc@328 321 void ws_query_by_key(const char*dbName, const char *trackKey, const char* featureFileName, const char* hostport);
mas01mc@334 322 void ws_liszt(const char* dbName, char* hostport);
mas01mc@334 323
mas01cr@0 324 };
mas01mc@17 325
mas01mc@292 326 #define O2_AUDIODB_INITIALIZERS \
mas01mc@292 327 dim(0), \
mas01mc@292 328 dbName(0), \
mas01mc@292 329 inFile(0), \
mas01mc@292 330 key(0), \
mas01mc@292 331 trackFileName(0), \
mas01mc@292 332 trackFile(0), \
mas01mc@292 333 command(0), \
mas01mc@292 334 output(0), \
mas01mc@292 335 timesFileName(0), \
mas01mc@292 336 timesFile(0), \
mas01mc@292 337 powerFileName(0), \
mas01mc@292 338 powerFile(0), \
mas01mc@324 339 adb_root(0), \
mas01mc@324 340 adb_feature_root(0), \
mas01mc@324 341 powerfd(0), \
mas01mc@292 342 dbfid(0), \
mas01mc@292 343 lshfid(0), \
mas01mc@292 344 forWrite(false), \
mas01mc@292 345 infid(0), \
mas01mc@292 346 dbH(0), \
mas01cr@498 347 adb(0), \
mas01mc@292 348 rng(0), \
mas01mc@292 349 fileTable(0), \
mas01mc@292 350 trackTable(0), \
mas01mc@292 351 l2normTable(0), \
mas01mc@292 352 timesTable(0), \
mas01mc@314 353 powerTable(0), \
mas01mc@324 354 featureFileNameTable(0), \
mas01mc@324 355 timesFileNameTable(0), \
mas01mc@324 356 powerFileNameTable(0), \
mas01mc@292 357 fileTableLength(0), \
mas01mc@292 358 trackTableLength(0), \
mas01mc@292 359 timesTableLength(0), \
mas01mc@292 360 powerTableLength(0), \
mas01mc@292 361 l2normTableLength(0), \
mas01mc@292 362 verbosity(1), \
mas01mc@292 363 nsamples(2000), \
mas01mc@292 364 datasize(O2_DEFAULT_DATASIZE), \
mas01mc@292 365 ntracks(O2_DEFAULT_NTRACKS), \
mas01mc@292 366 datadim(O2_DEFAULT_DATADIM), \
mas01mc@292 367 queryType(O2_POINT_QUERY), \
mas01mc@292 368 pointNN(O2_DEFAULT_POINTNN), \
mas01mc@292 369 trackNN(O2_DEFAULT_TRACKNN), \
mas01mc@292 370 sequenceLength(16), \
mas01mc@292 371 sequenceHop(1), \
mas01mc@292 372 normalizedDistance(true), \
mas01mc@292 373 no_unit_norming(false), \
mas01mc@292 374 queryPoint(0), \
mas01mc@292 375 usingQueryPoint(0), \
mas01mc@292 376 usingTimes(0), \
mas01mc@292 377 usingPower(0), \
mas01mc@292 378 isClient(0), \
mas01mc@292 379 isServer(0), \
mas01mc@292 380 port(0), \
mas01mc@292 381 timesTol(0.1), \
mas01mc@292 382 radius(0), \
mas01mc@292 383 query_from_key(false), \
mas01cr@498 384 query_from_key_index((uint32_t) -1), \
mas01mc@292 385 use_absolute_threshold(false), \
mas01mc@292 386 absolute_threshold(0.0), \
mas01mc@292 387 use_relative_threshold(false), \
mas01mc@292 388 relative_threshold(0.0), \
mas01mc@292 389 reporter(0), \
mas01mc@334 390 lisztOffset(0), \
mas01mc@334 391 lisztLength(0), \
mas01mc@292 392 lsh(0), \
mas01mc@292 393 lsh_in_core(false), \
mas01mc@292 394 lsh_use_u_functions(false), \
mas01mc@292 395 lsh_exact(false), \
mas01mc@308 396 WS_load_index(false), \
mas01mc@292 397 lsh_param_k(0), \
mas01mc@292 398 lsh_param_m(0), \
mas01mc@292 399 lsh_param_N(0), \
mas01mc@292 400 lsh_param_b(0), \
mas01cr@498 401 lsh_param_ncols(0)
mas01mc@292 402 #endif