annotate audioDB.h @ 512:6439cfba2524 memory-leaks

Implemented correct LSH table via compile-time switch -DLSH_DUMP_CORE_TABLES. Dumps on LSH load.
author mas01mc
date Fri, 23 Jan 2009 18:45:44 +0000
parents eb9a7ef110c1
children
rev   line source
mas01mc@292 1 #ifndef __AUDIODB_H_
mas01mc@292 2 #define __AUDIODB_H_
mas01mc@292 3
mas01cr@0 4 #include <stdio.h>
mas01cr@0 5 #include <stdlib.h>
mas01cr@0 6 #include <sys/types.h>
mas01cr@0 7 #include <sys/stat.h>
mas01cr@0 8 #include <sys/mman.h>
mas01cr@0 9 #include <fcntl.h>
mas01cr@0 10 #include <string.h>
mas01cr@0 11 #include <iostream>
mas01cr@0 12 #include <fstream>
mas01cr@302 13 #include <set>
mas01cr@302 14 #include <string>
mas01cr@0 15 #include <math.h>
mas01cr@0 16 #include <sys/time.h>
mas01cr@0 17 #include <assert.h>
mas01cr@62 18 #include <float.h>
mas01cr@104 19 #include <signal.h>
mas01cr@280 20 #include <gsl/gsl_rng.h>
mas01cr@0 21
mas01mc@292 22 // includes for LSH indexing
mas01mc@292 23 #include "ReporterBase.h"
mas01mc@292 24 #include "lshlib.h"
mas01mc@292 25
mas01cr@0 26 // includes for web services
mas01cr@0 27 #include "soapH.h"
mas01cr@0 28 #include "cmdline.h"
mas01cr@0 29
mas01cr@0 30 #define MAXSTR 512
mas01cr@0 31
mas01cr@0 32 // Databse PRIMARY commands
mas01cr@0 33 #define COM_CREATE "--NEW"
mas01cr@0 34 #define COM_INSERT "--INSERT"
mas01cr@0 35 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 36 #define COM_QUERY "--QUERY"
mas01cr@0 37 #define COM_STATUS "--STATUS"
mas01cr@0 38 #define COM_L2NORM "--L2NORM"
mas01cr@193 39 #define COM_POWER "--POWER"
mas01cr@0 40 #define COM_DUMP "--DUMP"
mas01cr@0 41 #define COM_SERVER "--SERVER"
mas01mc@292 42 #define COM_INDEX "--INDEX"
mas01cr@280 43 #define COM_SAMPLE "--SAMPLE"
mas01mc@334 44 #define COM_LISZT "--LISZT"
mas01cr@0 45
mas01cr@0 46 // parameters
mas01cr@0 47 #define COM_CLIENT "--client"
mas01cr@0 48 #define COM_DATABASE "--database"
mas01cr@0 49 #define COM_QTYPE "--qtype"
mas01cr@0 50 #define COM_SEQLEN "--sequencelength"
mas01cr@0 51 #define COM_SEQHOP "--sequencehop"
mas01cr@0 52 #define COM_POINTNN "--pointnn"
mas01mc@307 53 #define COM_RADIUS "--radius"
mas01mc@18 54 #define COM_TRACKNN "--resultlength"
mas01cr@0 55 #define COM_QPOINT "--qpoint"
mas01cr@0 56 #define COM_FEATURES "--features"
mas01cr@0 57 #define COM_QUERYKEY "--key"
mas01cr@0 58 #define COM_KEYLIST "--keyList"
mas01cr@0 59 #define COM_TIMES "--times"
mas01cr@193 60 #define COM_QUERYPOWER "--power"
mas01cr@193 61 #define COM_RELATIVE_THRESH "--relative-threshold"
mas01cr@193 62 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
mas01mc@310 63 #define COM_EXHAUSTIVE "--exhaustive"
mas01mc@310 64 #define COM_LSH_EXACT "--lsh_exact"
mas01mc@471 65 #define COM_NO_UNIT_NORMING "--no_unit_norming"
mas01cr@0 66
mas01mc@314 67 // Because LSH returns NN with P(1)<1 we want to return exact
mas01mc@314 68 // points above this boundary.
mas01mc@314 69 // Because we work in Radius^2 units,
mas01mc@314 70 // The sqrt of this number is the multiplier on the radius
mas01mc@314 71
mas01cr@108 72 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 73 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@210 74 #define O2_FORMAT_VERSION (4U)
mas01cr@0 75
mas01cr@0 76 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 77 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 78
mas01mc@248 79 //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
mas01mc@7 80 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01cr@0 81
mas01mc@295 82 // Bit masks for packing (trackID,pointID) into 32-bit unsigned int
mas01mc@324 83 // This can be controlled at compile time
mas01mc@324 84 #define O2_DEFAULT_LSH_N_POINT_BITS 14
mas01mc@324 85
mas01mc@324 86 // Override the default point bit width for large database support
mas01mc@324 87 #ifndef LSH_N_POINT_BITS
mas01mc@324 88 #define LSH_N_POINT_BITS O2_DEFAULT_LSH_N_POINT_BITS
mas01mc@324 89 #endif
mas01mc@295 90
mas01mc@502 91 // Backwards-compatible nbits field size, control at compile time with -DLSH_N_POINT_BITS
mas01mc@502 92 #if LSH_N_POINT_BITS > 15
mas01mc@500 93 #define LSH_POINT_BITS_FIELD_POSITION 27 // LSB bit position in 32-bit int for n_point_bits
mas01mc@500 94 #define LSH_POINT_BITS_FIELD_MAX 31 // maximum value possible given LSH_POINT_BITS_FIELD_POSITION
mas01mc@502 95 #else
mas01mc@502 96 #define LSH_POINT_BITS_FIELD_POSITION 28 // LSB bit position in 32-bit int for n_point_bits
mas01mc@502 97 #define LSH_POINT_BITS_FIELD_MAX 15 // maximum value possible given LSH_POINT_BITS_FIELD_POSITION
mas01mc@502 98 #endif
mas01mc@502 99
mas01mc@502 100
mas01mc@500 101
mas01mc@500 102
mas01mc@295 103 // LIMIT PARAMETERS
mas01cr@256 104 #define O2_DEFAULT_DATASIZE (1355U) // in MB
mas01cr@256 105 #define O2_DEFAULT_NTRACKS (20000U)
mas01cr@256 106 #define O2_DEFAULT_DATADIM (9U)
mas01mc@292 107 #define O2_REALTYPE (double)
mas01mc@324 108 #define O2_MAXFILES (1000000U)
mas01cr@0 109 #define O2_MAXFILESTR (256U)
mas01cr@256 110 #define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR)
mas01cr@256 111 #define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned))
mas01cr@0 112 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 113 #define O2_MEANNUMVECTORS (1000U)
mas01mc@464 114 #define O2_MAXDIM (20000U)
mas01mc@263 115 #define O2_MAXNN (1000000U)
mas01mc@292 116 #define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence
mas01mc@324 117 #define O2_MAXTRACKS (1000000U) // maximum number of tracks
mas01mc@324 118 #define O2_MAXTRACKLEN (1<<LSH_N_POINT_BITS) // maximum shingles in a track
mas01mc@292 119 #define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB
mas01mc@292 120 #define O2_DISTANCE_TOLERANCE (1e-6)
mas01mc@324 121 #define O2_SERIAL_MAX_TRACKBATCH (1000000)
mas01mc@324 122 #define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes)
mas01mc@324 123 #define O2_LARGE_ADB_NTRACKS (O2_DEFAULT_NTRACKS+1) // ntracks at which features are kept externally
mas01mc@324 124 #define O2_MAX_VECTORS ( O2_MEANNUMVECTORS * O2_MAXTRACKS )
mas01cr@0 125
mas01cr@0 126 // Flags
mas01cr@0 127 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 128 #define O2_FLAG_MINMAX (0x2U)
mas01cr@193 129 #define O2_FLAG_POWER (0x4U)
mas01cr@0 130 #define O2_FLAG_TIMES (0x20U)
mas01mc@324 131 #define O2_FLAG_LARGE_ADB (0x40U)
mas01mc@301 132 #define DISPLAY_FLAG(x) (x?"on":"off")
mas01cr@0 133
mas01cr@105 134 // Query types
mas01cr@105 135 #define O2_POINT_QUERY (0x4U)
mas01cr@105 136 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 137 #define O2_TRACK_QUERY (0x10U)
mas01mc@248 138 #define O2_N_SEQUENCE_QUERY (0x20U)
mas01mc@263 139 #define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)
mas01mc@248 140
mas01cr@0 141 // Error Codes
mas01cr@0 142 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 143
mas01cr@0 144 // Macros
mas01cr@0 145 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 146
mas01cr@370 147 #define ALIGN_UP(x,w) (((x) + ((1<<w)-1)) & ~((1<<w)-1))
mas01cr@108 148 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 149
mas01cr@370 150 #define ALIGN_PAGE_UP(x) (((x) + (getpagesize()-1)) & ~(getpagesize()-1))
mas01cr@196 151 #define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
mas01cr@196 152
mas01cr@166 153 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 154
mas01cr@239 155 #define CHECKED_MMAP(type, var, start, length) \
mas01cr@239 156 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
mas01cr@239 157 if(tmp == (void *) -1) { \
mas01cr@239 158 error("mmap error for db table", #var, "mmap"); \
mas01cr@239 159 } \
mas01cr@239 160 var = (type) tmp; \
mas01cr@239 161 }
mas01cr@239 162
mas01cr@370 163 #define CHECKED_READ(fd, buf, count) \
mas01cr@370 164 { size_t tmpcount = count; \
mas01cr@370 165 ssize_t tmp = read(fd, buf, tmpcount); \
mas01cr@370 166 if(tmp == -1) { \
mas01cr@370 167 error("read error", "", "read"); \
mas01cr@370 168 } else if((size_t) tmp != tmpcount) { \
mas01cr@370 169 error("short read", ""); \
mas01cr@370 170 } \
mas01cr@370 171 }
mas01cr@370 172
mas01cr@370 173 #define CHECKED_WRITE(fd, buf, count) \
mas01cr@370 174 { size_t tmpcount = count; \
mas01cr@370 175 ssize_t tmp = write(fd, buf, tmpcount); \
mas01cr@370 176 if(tmp == -1) { \
mas01cr@370 177 error("write error", "", "write"); \
mas01cr@370 178 } else if((size_t) tmp != tmpcount) { \
mas01cr@370 179 error("short write", ""); \
mas01cr@370 180 } \
mas01cr@370 181 }
mas01cr@370 182
mas01cr@239 183 #define VERB_LOG(vv, ...) \
mas01cr@239 184 if(verbosity > vv) { \
mas01cr@239 185 fprintf(stderr, __VA_ARGS__); \
mas01cr@239 186 fflush(stderr); \
mas01cr@239 187 }
mas01cr@0 188
mas01mc@324 189 // We will only use this in a 32-bit address space
mas01mc@324 190 // So map the off_t down to 32-bits first
mas01mc@324 191 #define INSERT_FILETABLE_STRING(TABLE, STR) \
mas01mc@324 192 strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR));
mas01mc@324 193
mas01mc@324 194 #define SAFE_DELETE(PTR) delete PTR; PTR=0;
mas01mc@324 195 #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;
mas01mc@324 196
mas01mc@308 197 extern LSH* SERVER_LSH_INDEX_SINGLETON;
mas01mc@324 198 extern char* SERVER_ADB_ROOT;
mas01mc@324 199 extern char* SERVER_ADB_FEATURE_ROOT;
mas01mc@308 200
mas01cr@210 201 typedef struct dbTableHeader {
mas01cr@114 202 uint32_t magic;
mas01cr@114 203 uint32_t version;
mas01cr@114 204 uint32_t numFiles;
mas01cr@114 205 uint32_t dim;
mas01cr@114 206 uint32_t flags;
mas01cr@210 207 uint32_t headerSize;
mas01cr@196 208 off_t length;
mas01cr@196 209 off_t fileTableOffset;
mas01cr@196 210 off_t trackTableOffset;
mas01cr@196 211 off_t dataOffset;
mas01cr@196 212 off_t l2normTableOffset;
mas01cr@196 213 off_t timesTableOffset;
mas01cr@196 214 off_t powerTableOffset;
mas01cr@196 215 off_t dbSize;
mas01cr@0 216 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 217
mas01ik@355 218 typedef struct {
mas01ik@355 219
mas01ik@355 220 unsigned numFiles;
mas01ik@355 221 unsigned dim;
mas01ik@355 222 unsigned length;
mas01ik@355 223 unsigned dudCount;
mas01ik@355 224 unsigned nullCount;
mas01ik@355 225 unsigned flags;
mas01ik@355 226
mas01ik@355 227
mas01ik@355 228 } cppstatus, *cppstatusptr;
mas01mc@292 229
mas01mc@292 230 class PointPair{
mas01mc@292 231 public:
mas01mc@292 232 Uns32T trackID;
mas01mc@292 233 Uns32T qpos;
mas01mc@292 234 Uns32T spos;
mas01mc@292 235 PointPair(Uns32T a, Uns32T b, Uns32T c);
mas01mc@292 236 };
mas01mc@292 237
mas01mc@292 238 bool operator<(const PointPair& a, const PointPair& b);
mas01cr@0 239
mas01mc@308 240 class audioDB{
mas01cr@0 241 private:
mas01cr@0 242 gengetopt_args_info args_info;
mas01cr@0 243 unsigned dim;
mas01cr@0 244 const char *dbName;
mas01cr@0 245 const char *inFile;
mas01cr@0 246 const char *hostport;
mas01cr@0 247 const char *key;
mas01mc@18 248 const char* trackFileName;
mas01cr@239 249 std::ifstream *trackFile;
mas01cr@0 250 const char *command;
mas01cr@131 251 const char *output;
mas01cr@0 252 const char *timesFileName;
mas01cr@239 253 std::ifstream *timesFile;
mas01cr@193 254 const char *powerFileName;
mas01cr@239 255 std::ifstream *powerFile;
mas01mc@324 256 const char* adb_root;
mas01mc@324 257 const char* adb_feature_root;
mas01mc@324 258
mas01cr@193 259 int powerfd;
mas01cr@0 260 int dbfid;
mas01mc@292 261 int lshfid;
mas01cr@196 262 bool forWrite;
mas01cr@0 263 int infid;
mas01cr@0 264 char* db;
mas01cr@0 265 char* indata;
mas01cr@0 266 struct stat statbuf;
mas01cr@0 267 dbTableHeaderPtr dbH;
mas01cr@284 268
mas01cr@284 269 gsl_rng *rng;
mas01cr@0 270
mas01mc@324 271 char* fileTable;
mas01mc@18 272 unsigned* trackTable;
mas01mc@324 273 off_t* trackOffsetTable;
mas01cr@0 274 double* dataBuf;
mas01cr@0 275 double* inBuf;
mas01cr@0 276 double* l2normTable;
mas01cr@196 277 double* timesTable;
mas01cr@193 278 double* powerTable;
mas01cr@0 279
mas01mc@324 280 char* featureFileNameTable;
mas01mc@324 281 char* timesFileNameTable;
mas01mc@324 282 char* powerFileNameTable;
mas01mc@324 283
mas01cr@196 284 size_t fileTableLength;
mas01cr@196 285 size_t trackTableLength;
mas01cr@196 286 off_t dataBufLength;
mas01cr@196 287 size_t timesTableLength;
mas01cr@196 288 size_t powerTableLength;
mas01cr@196 289 size_t l2normTableLength;
mas01cr@196 290
mas01cr@0 291 // Flags and parameters
mas01cr@0 292 unsigned verbosity; // how much do we want to know?
mas01cr@256 293
mas01cr@280 294 unsigned nsamples;
mas01cr@280 295
mas01cr@256 296 //off_t size; // given size (for creation)
mas01cr@256 297 unsigned datasize; // size in MB
mas01cr@256 298 unsigned ntracks;
mas01cr@256 299 unsigned datadim;
mas01cr@256 300
mas01cr@0 301 unsigned queryType; // point queries default
mas01cr@0 302 unsigned pointNN; // how many point NNs ?
mas01mc@18 303 unsigned trackNN; // how many track NNs ?
mas01cr@0 304 unsigned sequenceLength;
mas01cr@0 305 unsigned sequenceHop;
mas01cr@239 306 bool normalizedDistance;
mas01mc@292 307 bool no_unit_norming;
mas01cr@0 308 unsigned queryPoint;
mas01cr@0 309 unsigned usingQueryPoint;
mas01cr@0 310 unsigned usingTimes;
mas01cr@193 311 unsigned usingPower;
mas01cr@0 312 unsigned isClient;
mas01cr@0 313 unsigned isServer;
mas01cr@0 314 unsigned port;
mas01cr@0 315 double timesTol;
mas01mc@17 316 double radius;
mas01mc@292 317 bool query_from_key;
mas01mc@292 318 Uns32T query_from_key_index;
mas01cr@193 319 bool use_absolute_threshold;
mas01cr@193 320 double absolute_threshold;
mas01cr@193 321 bool use_relative_threshold;
mas01cr@193 322 double relative_threshold;
mas01mc@334 323
mas01mc@292 324 ReporterBase* reporter; // track/point reporter
mas01mc@292 325 priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >* exact_evaluation_queue;
mas01mc@414 326 set<Uns32T> * allowed_keys; // search restrict list by key
mas01mc@292 327
mas01cr@0 328 // Timers
mas01cr@0 329 struct timeval tv1;
mas01cr@0 330 struct timeval tv2;
mas01mc@334 331
mas01mc@334 332 // LISZT parameters
mas01mc@334 333 unsigned lisztOffset;
mas01mc@334 334 unsigned lisztLength;
mas01mc@334 335
mas01ik@355 336 //for lib / API
mas01cr@370 337 int apierrortemp;
mas01ik@355 338 unsigned UseApiError;
mas01ik@355 339
mas01cr@0 340 // private methods
mas01cr@32 341 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@193 342 void sequence_sum(double *buffer, int length, int seqlen);
mas01cr@193 343 void sequence_sqrt(double *buffer, int length, int seqlen);
mas01cr@193 344 void sequence_average(double *buffer, int length, int seqlen);
mas01cr@193 345
mas01ik@355 346
mas01cr@239 347 void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD);
mas01cr@239 348 void delete_arrays(int track, unsigned int numVectors, double **D, double **DD);
mas01mc@324 349 void read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p);
mas01cr@239 350 void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp);
mas01mc@292 351 void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex);
mas01cr@239 352 void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp);
mas01mc@292 353 void query_loop(const char* dbName, Uns32T queryIndex);
mas01mc@292 354 void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors);
mas01mc@292 355 double dot_product_points(double* q, double* p, Uns32T L);
mas01cr@284 356 void initRNG();
mas01cr@196 357 void initDBHeader(const char *dbName);
mas01mc@324 358 void initInputFile(const char *inFile, bool loadData = true);
mas01mc@292 359 void initTables(const char* dbName, const char* inFile = 0);
mas01mc@292 360 void initTablesFromKey(const char* dbName, const Uns32T queryIndex);
mas01cr@0 361 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 362 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@239 363 void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
mas01cr@193 364 void insertPowerData(unsigned n, int powerfd, double *powerdata);
mas01cr@0 365 unsigned getKeyPos(char* key);
mas01mc@324 366 void prefix_name(char** const name, const char* prefix);
mas01mc@324 367
mas01cr@0 368 public:
mas01cr@370 369 audioDB(const unsigned argc, const char *argv[]);
mas01cr@370 370 audioDB(const unsigned argc, const char *argv[], adb__queryResponse *adbQueryResponse);
mas01cr@370 371 audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse);
mas01cr@370 372 audioDB(const unsigned argc, const char *argv[], adb__lisztResponse *adbLisztResponse);
mas01cr@370 373 audioDB(const unsigned argc, const char *argv[], int * apierror);
mas01cr@370 374 audioDB(const unsigned argc, const char *argv[], cppstatusptr stat, int * apierror);
mas01cr@370 375 audioDB(const unsigned argc, const char *argv[],adb__queryResponse *adbQueryResponse, int * apierror);
mas01ik@355 376
mas01mc@334 377
mas01cr@97 378 void cleanup();
mas01cr@0 379 ~audioDB();
mas01cr@370 380 int processArgs(const unsigned argc, const char* argv[]);
mas01cr@30 381 void get_lock(int fd, bool exclusive);
mas01cr@30 382 void release_lock(int fd);
mas01cr@0 383 void create(const char* dbName);
mas01cr@251 384 bool enough_per_file_space_free();
mas01cr@196 385 bool enough_data_space_free(off_t size);
mas01cr@196 386 void insert_data_vectors(off_t offset, void *buffer, size_t size);
mas01cr@0 387 void insert(const char* dbName, const char* inFile);
mas01cr@0 388 void batchinsert(const char* dbName, const char* inFile);
mas01mc@324 389 void batchinsert_large_adb(const char* dbName, const char* inFile);
mas01cr@133 390 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 391 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01ik@355 392 void status(const char* dbName, cppstatusptr status);
mas01ik@355 393
mas01cr@284 394 unsigned random_track(unsigned *propTable, unsigned total);
mas01cr@280 395 void sample(const char *dbName);
mas01cr@0 396 void l2norm(const char* dbName);
mas01cr@193 397 void power_flag(const char *dbName);
mas01cr@193 398 bool powers_acceptable(double p1, double p2);
mas01cr@0 399 void dump(const char* dbName);
mas01mc@334 400 void liszt(const char* dbName, unsigned offset, unsigned numLines, adb__lisztResponse* adbLisztResponse=0);
mas01cr@0 401
mas01mc@292 402 // LSH indexing parameters and data structures
mas01mc@292 403 LSH* lsh;
mas01mc@292 404 bool lsh_in_core; // load LSH tables for query into core (true) or keep on disk (false)
mas01mc@292 405 bool lsh_use_u_functions;
mas01mc@292 406 bool lsh_exact; // flag to indicate use exact evaluation of points returned by LSH
mas01mc@308 407 bool WS_load_index; // flag to indicate that we want to make a Web Services index memory resident
mas01mc@292 408 double lsh_param_w; // Width of LSH hash-function bins
mas01mc@292 409 Uns32T lsh_param_k; // Number of independent hash functions
mas01mc@292 410 Uns32T lsh_param_m; // Combinatorial parameter for m(m-1)/2 hash tables
mas01mc@292 411 Uns32T lsh_param_N; // Number of rows per hash table
mas01mc@292 412 Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration
mas01mc@292 413 Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row
mas01mc@324 414 Uns32T lsh_n_point_bits; // How many bits to use to encode point ID within a track
mas01mc@324 415
mas01mc@292 416
mas01mc@292 417 // LSH vector<> containers for one in-core copy of a set of feature vectors
mas01mc@292 418 vector<float>::iterator vi; // feature vector iterator
mas01mc@292 419 vector<vector<float> > *vv; // one-track's worth data
mas01mc@292 420
mas01mc@292 421 // LSH indexing and retrieval methods
mas01mc@292 422 void index_index_db(const char* dbName);
mas01mc@292 423 void index_initialize(double**,double**,double**,double**,unsigned int*);
mas01mc@292 424 void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
mas01mc@292 425 int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
mas01mc@292 426 Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
mas01mc@292 427 void index_make_shingle(vector<vector<float> >*, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen);
mas01mc@292 428 int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp);
mas01mc@292 429 int index_query_loop(const char* dbName, Uns32T queryIndex);
mas01mc@292 430 vector<vector<float> >* index_initialize_shingles(Uns32T sz);
mas01mc@292 431 int index_init_query(const char* dbName);
mas01mc@292 432 int index_exists(const char* dbName, double radius, Uns32T sequenceLength);
mas01mc@292 433 char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength);
mas01mc@414 434 static void index_add_point(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method
mas01mc@324 435 static Uns32T index_to_trackID(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackID
mas01mc@324 436 static Uns32T index_to_trackPos(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackPos (spos)
mas01mc@324 437 static Uns32T index_from_trackInfo(Uns32T trackID, Uns32T pntID, Uns32T nPntBits); // Convert audioDB trackID and trackPos to an lsh point index
mas01mc@292 438 void initialize_exact_evalutation_queue();
mas01mc@292 439 void index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos);
mas01mc@308 440 LSH* index_allocate(char* indexName, bool load_hashTables);
mas01mc@324 441 void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
mas01mc@414 442 void initialize_allowed_keys(std::ifstream*); // implementation of restrict lists using STL "set" class
mas01mc@414 443 int is_in_allowed_keys(Uns32T trackID); // test method for allowed_keys used during search
mas01mc@324 444
mas01mc@292 445 // Web Services
mas01cr@0 446 void startServer();
mas01ik@355 447
mas01mc@308 448 void ws_status(const char*dbName, char* hostport);
mas01mc@308 449 void ws_query(const char*dbName, const char *featureFileName, const char* hostport);
mas01mc@328 450 void ws_query_by_key(const char*dbName, const char *trackKey, const char* featureFileName, const char* hostport);
mas01mc@334 451 void ws_liszt(const char* dbName, char* hostport);
mas01mc@334 452
mas01cr@0 453 };
mas01mc@17 454
mas01mc@292 455 #define O2_AUDIODB_INITIALIZERS \
mas01mc@292 456 dim(0), \
mas01mc@292 457 dbName(0), \
mas01mc@292 458 inFile(0), \
mas01mc@292 459 key(0), \
mas01mc@292 460 trackFileName(0), \
mas01mc@292 461 trackFile(0), \
mas01mc@292 462 command(0), \
mas01mc@292 463 output(0), \
mas01mc@292 464 timesFileName(0), \
mas01mc@292 465 timesFile(0), \
mas01mc@292 466 powerFileName(0), \
mas01mc@292 467 powerFile(0), \
mas01mc@324 468 adb_root(0), \
mas01mc@324 469 adb_feature_root(0), \
mas01mc@324 470 powerfd(0), \
mas01mc@292 471 dbfid(0), \
mas01mc@292 472 lshfid(0), \
mas01mc@292 473 forWrite(false), \
mas01mc@292 474 infid(0), \
mas01mc@292 475 db(0), \
mas01mc@292 476 indata(0), \
mas01mc@292 477 dbH(0), \
mas01mc@292 478 rng(0), \
mas01mc@292 479 fileTable(0), \
mas01mc@292 480 trackTable(0), \
mas01mc@292 481 trackOffsetTable(0), \
mas01mc@292 482 dataBuf(0), \
mas01mc@292 483 l2normTable(0), \
mas01mc@292 484 timesTable(0), \
mas01mc@314 485 powerTable(0), \
mas01mc@324 486 featureFileNameTable(0), \
mas01mc@324 487 timesFileNameTable(0), \
mas01mc@324 488 powerFileNameTable(0), \
mas01mc@292 489 fileTableLength(0), \
mas01mc@292 490 trackTableLength(0), \
mas01mc@292 491 dataBufLength(0), \
mas01mc@292 492 timesTableLength(0), \
mas01mc@292 493 powerTableLength(0), \
mas01mc@292 494 l2normTableLength(0), \
mas01mc@292 495 verbosity(1), \
mas01mc@292 496 nsamples(2000), \
mas01mc@292 497 datasize(O2_DEFAULT_DATASIZE), \
mas01mc@292 498 ntracks(O2_DEFAULT_NTRACKS), \
mas01mc@292 499 datadim(O2_DEFAULT_DATADIM), \
mas01mc@292 500 queryType(O2_POINT_QUERY), \
mas01mc@292 501 pointNN(O2_DEFAULT_POINTNN), \
mas01mc@292 502 trackNN(O2_DEFAULT_TRACKNN), \
mas01mc@292 503 sequenceLength(16), \
mas01mc@292 504 sequenceHop(1), \
mas01mc@292 505 normalizedDistance(true), \
mas01mc@292 506 no_unit_norming(false), \
mas01mc@292 507 queryPoint(0), \
mas01mc@292 508 usingQueryPoint(0), \
mas01mc@292 509 usingTimes(0), \
mas01mc@292 510 usingPower(0), \
mas01mc@292 511 isClient(0), \
mas01mc@292 512 isServer(0), \
mas01mc@292 513 port(0), \
mas01mc@292 514 timesTol(0.1), \
mas01mc@292 515 radius(0), \
mas01mc@292 516 query_from_key(false), \
mas01mc@292 517 query_from_key_index(O2_ERR_KEYNOTFOUND), \
mas01mc@292 518 use_absolute_threshold(false), \
mas01mc@292 519 absolute_threshold(0.0), \
mas01mc@292 520 use_relative_threshold(false), \
mas01mc@292 521 relative_threshold(0.0), \
mas01mc@292 522 reporter(0), \
mas01mc@292 523 exact_evaluation_queue(0), \
mas01mc@414 524 allowed_keys(0), \
mas01mc@334 525 lisztOffset(0), \
mas01mc@334 526 lisztLength(0), \
mas01cr@370 527 apierrortemp(0), \
mas01cr@370 528 UseApiError(0), \
mas01mc@292 529 lsh(0), \
mas01mc@292 530 lsh_in_core(false), \
mas01mc@292 531 lsh_use_u_functions(false), \
mas01mc@292 532 lsh_exact(false), \
mas01mc@308 533 WS_load_index(false), \
mas01mc@292 534 lsh_param_k(0), \
mas01mc@292 535 lsh_param_m(0), \
mas01mc@292 536 lsh_param_N(0), \
mas01mc@292 537 lsh_param_b(0), \
mas01mc@292 538 lsh_param_ncols(0), \
mas01mc@324 539 lsh_n_point_bits(0), \
mas01cr@370 540 vv(0)
mas01mc@292 541 #endif