annotate audioDB.h @ 395:bc7a821004bb api-inversion

Invert audioDB::status / audiodb_status(). To do that without breaking abstractions, we actually need a new field in the status structure, storing the size of the data region. Previously, this was computed in the audioDB::status request from the database header, but I'm assuming that "user" code doesn't have access to such internals. While we're at it, name some intermediate values in audioDB::status() so that I don't get confused. Here's the thing, though: we need to make sure that the adb_t * that we have from audiodb_open() or audiodb_create() is propagated all the way through into the C++ routines that implement library functions -- in particular those which actually write to the database; otherwise we won't have a consistent view in memory of the header on-disk (as the adb header that will have been written to disk won't be the same as the one in memory). We can do that, by altering the "API" audioDB constructors to take the adb_t * argument, and setting the adb field in the audioDB object that we've already introduced to that. But now we need to be careful a couple of times: if we have one, then audioDB::initTables() mustn't stomp on it; also, if we're only constructing an audioDB instance to fulfil an API request, we mustn't audiodb_close() the one we have when we destroy the audioDB object, because the adb_t * is the one we have passed in and are going to reuse in later calls to the API. The good news is that we can be careful in just these ways with minimal code. The really good news is that once the inversion is complete, all of this horribleness will automatically go away (as there will be no code which constructs audioDB objects to fulfil API functions). Hooray! It's almost like it was all planned this way.
author mas01cr
date Tue, 25 Nov 2008 16:41:01 +0000
parents fd9b65e5ca95
children a8a5f2ca5380
rev   line source
mas01mc@292 1 #ifndef __AUDIODB_H_
mas01mc@292 2 #define __AUDIODB_H_
mas01mc@292 3
mas01cr@0 4 #include <stdio.h>
mas01cr@0 5 #include <stdlib.h>
mas01cr@0 6 #include <sys/types.h>
mas01cr@0 7 #include <sys/stat.h>
mas01cr@0 8 #include <sys/mman.h>
mas01cr@0 9 #include <fcntl.h>
mas01cr@0 10 #include <string.h>
mas01cr@0 11 #include <iostream>
mas01cr@0 12 #include <fstream>
mas01cr@302 13 #include <set>
mas01cr@302 14 #include <string>
mas01cr@0 15 #include <math.h>
mas01cr@0 16 #include <sys/time.h>
mas01cr@0 17 #include <assert.h>
mas01cr@62 18 #include <float.h>
mas01cr@104 19 #include <signal.h>
mas01cr@280 20 #include <gsl/gsl_rng.h>
mas01cr@0 21
mas01mc@292 22 // includes for LSH indexing
mas01mc@292 23 #include "ReporterBase.h"
mas01mc@292 24 #include "lshlib.h"
mas01mc@292 25
mas01cr@0 26 // includes for web services
mas01cr@0 27 #include "soapH.h"
mas01cr@0 28 #include "cmdline.h"
mas01cr@0 29
mas01cr@0 30 #define MAXSTR 512
mas01cr@0 31
mas01cr@0 32 // Databse PRIMARY commands
mas01cr@0 33 #define COM_CREATE "--NEW"
mas01cr@0 34 #define COM_INSERT "--INSERT"
mas01cr@0 35 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 36 #define COM_QUERY "--QUERY"
mas01cr@0 37 #define COM_STATUS "--STATUS"
mas01cr@0 38 #define COM_L2NORM "--L2NORM"
mas01cr@193 39 #define COM_POWER "--POWER"
mas01cr@0 40 #define COM_DUMP "--DUMP"
mas01cr@0 41 #define COM_SERVER "--SERVER"
mas01mc@292 42 #define COM_INDEX "--INDEX"
mas01cr@280 43 #define COM_SAMPLE "--SAMPLE"
mas01mc@334 44 #define COM_LISZT "--LISZT"
mas01cr@0 45
mas01cr@0 46 // parameters
mas01cr@0 47 #define COM_CLIENT "--client"
mas01cr@0 48 #define COM_DATABASE "--database"
mas01cr@0 49 #define COM_QTYPE "--qtype"
mas01cr@0 50 #define COM_SEQLEN "--sequencelength"
mas01cr@0 51 #define COM_SEQHOP "--sequencehop"
mas01cr@0 52 #define COM_POINTNN "--pointnn"
mas01mc@307 53 #define COM_RADIUS "--radius"
mas01mc@18 54 #define COM_TRACKNN "--resultlength"
mas01cr@0 55 #define COM_QPOINT "--qpoint"
mas01cr@0 56 #define COM_FEATURES "--features"
mas01cr@0 57 #define COM_QUERYKEY "--key"
mas01cr@0 58 #define COM_KEYLIST "--keyList"
mas01cr@0 59 #define COM_TIMES "--times"
mas01cr@193 60 #define COM_QUERYPOWER "--power"
mas01cr@193 61 #define COM_RELATIVE_THRESH "--relative-threshold"
mas01cr@193 62 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
mas01mc@310 63 #define COM_EXHAUSTIVE "--exhaustive"
mas01mc@310 64 #define COM_LSH_EXACT "--lsh_exact"
mas01cr@0 65
mas01mc@314 66 // Because LSH returns NN with P(1)<1 we want to return exact
mas01mc@314 67 // points above this boundary.
mas01mc@314 68 // Because we work in Radius^2 units,
mas01mc@314 69 // The sqrt of this number is the multiplier on the radius
mas01mc@314 70
mas01mc@314 71 #define O2_LSH_EXACT_MULT 9
mas01mc@314 72
mas01cr@108 73 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 74 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@210 75 #define O2_FORMAT_VERSION (4U)
mas01cr@0 76
mas01cr@0 77 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 78 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 79
mas01mc@248 80 //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
mas01mc@7 81 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01cr@0 82
mas01mc@295 83 // Bit masks for packing (trackID,pointID) into 32-bit unsigned int
mas01mc@324 84 // This can be controlled at compile time
mas01mc@324 85 #define O2_DEFAULT_LSH_N_POINT_BITS 14
mas01mc@324 86
mas01mc@324 87 // Override the default point bit width for large database support
mas01mc@324 88 #ifndef LSH_N_POINT_BITS
mas01mc@324 89 #define LSH_N_POINT_BITS O2_DEFAULT_LSH_N_POINT_BITS
mas01mc@324 90 #endif
mas01mc@295 91
mas01mc@295 92 // LIMIT PARAMETERS
mas01cr@256 93 #define O2_DEFAULT_DATASIZE (1355U) // in MB
mas01cr@256 94 #define O2_DEFAULT_NTRACKS (20000U)
mas01cr@256 95 #define O2_DEFAULT_DATADIM (9U)
mas01mc@292 96 #define O2_REALTYPE (double)
mas01mc@324 97 #define O2_MAXFILES (1000000U)
mas01cr@0 98 #define O2_MAXFILESTR (256U)
mas01cr@256 99 #define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR)
mas01cr@256 100 #define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned))
mas01cr@0 101 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 102 #define O2_MEANNUMVECTORS (1000U)
mas01mc@292 103 #define O2_MAXDIM (2000U)
mas01mc@263 104 #define O2_MAXNN (1000000U)
mas01mc@292 105 #define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence
mas01mc@324 106 #define O2_MAXTRACKS (1000000U) // maximum number of tracks
mas01mc@324 107 #define O2_MAXTRACKLEN (1<<LSH_N_POINT_BITS) // maximum shingles in a track
mas01mc@292 108 #define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB
mas01mc@292 109 #define O2_DISTANCE_TOLERANCE (1e-6)
mas01mc@324 110 #define O2_SERIAL_MAX_TRACKBATCH (1000000)
mas01mc@324 111 #define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes)
mas01mc@324 112 #define O2_LARGE_ADB_NTRACKS (O2_DEFAULT_NTRACKS+1) // ntracks at which features are kept externally
mas01mc@324 113 #define O2_MAX_VECTORS ( O2_MEANNUMVECTORS * O2_MAXTRACKS )
mas01cr@0 114
mas01cr@0 115 // Flags
mas01cr@0 116 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 117 #define O2_FLAG_MINMAX (0x2U)
mas01cr@193 118 #define O2_FLAG_POWER (0x4U)
mas01cr@0 119 #define O2_FLAG_TIMES (0x20U)
mas01mc@324 120 #define O2_FLAG_LARGE_ADB (0x40U)
mas01mc@301 121 #define DISPLAY_FLAG(x) (x?"on":"off")
mas01cr@0 122
mas01cr@105 123 // Query types
mas01cr@105 124 #define O2_POINT_QUERY (0x4U)
mas01cr@105 125 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 126 #define O2_TRACK_QUERY (0x10U)
mas01mc@248 127 #define O2_N_SEQUENCE_QUERY (0x20U)
mas01mc@263 128 #define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)
mas01mc@248 129
mas01cr@0 130 // Error Codes
mas01cr@0 131 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 132
mas01cr@0 133 // Macros
mas01cr@0 134 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 135
mas01cr@370 136 #define ALIGN_UP(x,w) (((x) + ((1<<w)-1)) & ~((1<<w)-1))
mas01cr@108 137 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 138
mas01cr@370 139 #define ALIGN_PAGE_UP(x) (((x) + (getpagesize()-1)) & ~(getpagesize()-1))
mas01cr@196 140 #define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
mas01cr@196 141
mas01cr@166 142 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 143
mas01cr@239 144 #define CHECKED_MMAP(type, var, start, length) \
mas01cr@239 145 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
mas01cr@239 146 if(tmp == (void *) -1) { \
mas01cr@239 147 error("mmap error for db table", #var, "mmap"); \
mas01cr@239 148 } \
mas01cr@239 149 var = (type) tmp; \
mas01cr@239 150 }
mas01cr@239 151
mas01cr@370 152 #define CHECKED_READ(fd, buf, count) \
mas01cr@370 153 { size_t tmpcount = count; \
mas01cr@370 154 ssize_t tmp = read(fd, buf, tmpcount); \
mas01cr@370 155 if(tmp == -1) { \
mas01cr@370 156 error("read error", "", "read"); \
mas01cr@370 157 } else if((size_t) tmp != tmpcount) { \
mas01cr@370 158 error("short read", ""); \
mas01cr@370 159 } \
mas01cr@370 160 }
mas01cr@370 161
mas01cr@370 162 #define CHECKED_WRITE(fd, buf, count) \
mas01cr@370 163 { size_t tmpcount = count; \
mas01cr@370 164 ssize_t tmp = write(fd, buf, tmpcount); \
mas01cr@370 165 if(tmp == -1) { \
mas01cr@370 166 error("write error", "", "write"); \
mas01cr@370 167 } else if((size_t) tmp != tmpcount) { \
mas01cr@370 168 error("short write", ""); \
mas01cr@370 169 } \
mas01cr@370 170 }
mas01cr@370 171
mas01cr@239 172 #define VERB_LOG(vv, ...) \
mas01cr@239 173 if(verbosity > vv) { \
mas01cr@239 174 fprintf(stderr, __VA_ARGS__); \
mas01cr@239 175 fflush(stderr); \
mas01cr@239 176 }
mas01cr@0 177
mas01mc@324 178 // We will only use this in a 32-bit address space
mas01mc@324 179 // So map the off_t down to 32-bits first
mas01mc@324 180 #define INSERT_FILETABLE_STRING(TABLE, STR) \
mas01mc@324 181 strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR));
mas01mc@324 182
mas01mc@324 183 #define SAFE_DELETE(PTR) delete PTR; PTR=0;
mas01mc@324 184 #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;
mas01mc@324 185
mas01mc@308 186 extern LSH* SERVER_LSH_INDEX_SINGLETON;
mas01mc@324 187 extern char* SERVER_ADB_ROOT;
mas01mc@324 188 extern char* SERVER_ADB_FEATURE_ROOT;
mas01mc@308 189
mas01cr@210 190 typedef struct dbTableHeader {
mas01cr@114 191 uint32_t magic;
mas01cr@114 192 uint32_t version;
mas01cr@114 193 uint32_t numFiles;
mas01cr@114 194 uint32_t dim;
mas01cr@114 195 uint32_t flags;
mas01cr@210 196 uint32_t headerSize;
mas01cr@196 197 off_t length;
mas01cr@196 198 off_t fileTableOffset;
mas01cr@196 199 off_t trackTableOffset;
mas01cr@196 200 off_t dataOffset;
mas01cr@196 201 off_t l2normTableOffset;
mas01cr@196 202 off_t timesTableOffset;
mas01cr@196 203 off_t powerTableOffset;
mas01cr@196 204 off_t dbSize;
mas01cr@0 205 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 206
mas01mc@292 207 class PointPair{
mas01mc@292 208 public:
mas01mc@292 209 Uns32T trackID;
mas01mc@292 210 Uns32T qpos;
mas01mc@292 211 Uns32T spos;
mas01mc@292 212 PointPair(Uns32T a, Uns32T b, Uns32T c);
mas01mc@292 213 };
mas01mc@292 214
mas01mc@292 215 bool operator<(const PointPair& a, const PointPair& b);
mas01cr@0 216
mas01mc@308 217 class audioDB{
mas01cr@0 218 private:
mas01cr@0 219 gengetopt_args_info args_info;
mas01cr@0 220 unsigned dim;
mas01cr@0 221 const char *dbName;
mas01cr@0 222 const char *inFile;
mas01cr@0 223 const char *hostport;
mas01cr@0 224 const char *key;
mas01mc@18 225 const char* trackFileName;
mas01cr@239 226 std::ifstream *trackFile;
mas01cr@0 227 const char *command;
mas01cr@131 228 const char *output;
mas01cr@0 229 const char *timesFileName;
mas01cr@239 230 std::ifstream *timesFile;
mas01cr@193 231 const char *powerFileName;
mas01cr@239 232 std::ifstream *powerFile;
mas01mc@324 233 const char* adb_root;
mas01mc@324 234 const char* adb_feature_root;
mas01mc@324 235
mas01cr@193 236 int powerfd;
mas01cr@0 237 int dbfid;
mas01mc@292 238 int lshfid;
mas01cr@196 239 bool forWrite;
mas01cr@0 240 int infid;
mas01cr@0 241 char* db;
mas01cr@0 242 char* indata;
mas01cr@0 243 struct stat statbuf;
mas01cr@0 244 dbTableHeaderPtr dbH;
mas01cr@392 245 struct adb *adb;
mas01cr@284 246
mas01cr@284 247 gsl_rng *rng;
mas01cr@0 248
mas01mc@324 249 char* fileTable;
mas01mc@18 250 unsigned* trackTable;
mas01mc@324 251 off_t* trackOffsetTable;
mas01cr@0 252 double* dataBuf;
mas01cr@0 253 double* inBuf;
mas01cr@0 254 double* l2normTable;
mas01cr@196 255 double* timesTable;
mas01cr@193 256 double* powerTable;
mas01cr@0 257
mas01mc@324 258 char* featureFileNameTable;
mas01mc@324 259 char* timesFileNameTable;
mas01mc@324 260 char* powerFileNameTable;
mas01mc@324 261
mas01cr@196 262 size_t fileTableLength;
mas01cr@196 263 size_t trackTableLength;
mas01cr@196 264 off_t dataBufLength;
mas01cr@196 265 size_t timesTableLength;
mas01cr@196 266 size_t powerTableLength;
mas01cr@196 267 size_t l2normTableLength;
mas01cr@196 268
mas01cr@0 269 // Flags and parameters
mas01cr@0 270 unsigned verbosity; // how much do we want to know?
mas01cr@256 271
mas01cr@280 272 unsigned nsamples;
mas01cr@280 273
mas01cr@256 274 //off_t size; // given size (for creation)
mas01cr@256 275 unsigned datasize; // size in MB
mas01cr@256 276 unsigned ntracks;
mas01cr@256 277 unsigned datadim;
mas01cr@256 278
mas01cr@0 279 unsigned queryType; // point queries default
mas01cr@0 280 unsigned pointNN; // how many point NNs ?
mas01mc@18 281 unsigned trackNN; // how many track NNs ?
mas01cr@0 282 unsigned sequenceLength;
mas01cr@0 283 unsigned sequenceHop;
mas01cr@239 284 bool normalizedDistance;
mas01mc@292 285 bool no_unit_norming;
mas01cr@0 286 unsigned queryPoint;
mas01cr@0 287 unsigned usingQueryPoint;
mas01cr@0 288 unsigned usingTimes;
mas01cr@193 289 unsigned usingPower;
mas01cr@0 290 unsigned isClient;
mas01cr@0 291 unsigned isServer;
mas01cr@0 292 unsigned port;
mas01cr@0 293 double timesTol;
mas01mc@17 294 double radius;
mas01mc@292 295 bool query_from_key;
mas01mc@292 296 Uns32T query_from_key_index;
mas01cr@193 297 bool use_absolute_threshold;
mas01cr@193 298 double absolute_threshold;
mas01cr@193 299 bool use_relative_threshold;
mas01cr@193 300 double relative_threshold;
mas01mc@334 301
mas01mc@292 302 ReporterBase* reporter; // track/point reporter
mas01mc@292 303 priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >* exact_evaluation_queue;
mas01mc@292 304
mas01cr@0 305 // Timers
mas01cr@0 306 struct timeval tv1;
mas01cr@0 307 struct timeval tv2;
mas01mc@334 308
mas01mc@334 309 // LISZT parameters
mas01mc@334 310 unsigned lisztOffset;
mas01mc@334 311 unsigned lisztLength;
mas01mc@334 312
mas01ik@355 313 //for lib / API
mas01cr@370 314 int apierrortemp;
mas01ik@355 315 unsigned UseApiError;
mas01ik@355 316
mas01cr@0 317 // private methods
mas01cr@32 318 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@193 319 void sequence_sum(double *buffer, int length, int seqlen);
mas01cr@193 320 void sequence_sqrt(double *buffer, int length, int seqlen);
mas01cr@193 321 void sequence_average(double *buffer, int length, int seqlen);
mas01cr@193 322
mas01ik@355 323
mas01cr@239 324 void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD);
mas01cr@239 325 void delete_arrays(int track, unsigned int numVectors, double **D, double **DD);
mas01mc@324 326 void read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p);
mas01cr@239 327 void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp);
mas01mc@292 328 void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex);
mas01cr@239 329 void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp);
mas01mc@292 330 void query_loop(const char* dbName, Uns32T queryIndex);
mas01mc@292 331 void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors);
mas01mc@292 332 double dot_product_points(double* q, double* p, Uns32T L);
mas01cr@284 333 void initRNG();
mas01cr@196 334 void initDBHeader(const char *dbName);
mas01mc@324 335 void initInputFile(const char *inFile, bool loadData = true);
mas01mc@292 336 void initTables(const char* dbName, const char* inFile = 0);
mas01mc@292 337 void initTablesFromKey(const char* dbName, const Uns32T queryIndex);
mas01cr@0 338 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 339 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@239 340 void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
mas01cr@193 341 void insertPowerData(unsigned n, int powerfd, double *powerdata);
mas01cr@0 342 unsigned getKeyPos(char* key);
mas01mc@324 343 void prefix_name(char** const name, const char* prefix);
mas01mc@324 344
mas01cr@0 345 public:
mas01cr@370 346 audioDB(const unsigned argc, const char *argv[]);
mas01cr@370 347 audioDB(const unsigned argc, const char *argv[], adb__queryResponse *adbQueryResponse);
mas01cr@370 348 audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse);
mas01cr@370 349 audioDB(const unsigned argc, const char *argv[], adb__lisztResponse *adbLisztResponse);
mas01cr@395 350 audioDB(const unsigned argc, const char *argv[], int * apierror, struct adb *a);
mas01cr@395 351 audioDB(const unsigned argc, const char *argv[],adb__queryResponse *adbQueryResponse, int * apierror, struct adb *a);
mas01ik@355 352
mas01mc@334 353
mas01cr@97 354 void cleanup();
mas01cr@0 355 ~audioDB();
mas01cr@370 356 int processArgs(const unsigned argc, const char* argv[]);
mas01cr@30 357 void get_lock(int fd, bool exclusive);
mas01cr@30 358 void release_lock(int fd);
mas01cr@0 359 void create(const char* dbName);
mas01cr@251 360 bool enough_per_file_space_free();
mas01cr@196 361 bool enough_data_space_free(off_t size);
mas01cr@196 362 void insert_data_vectors(off_t offset, void *buffer, size_t size);
mas01cr@0 363 void insert(const char* dbName, const char* inFile);
mas01cr@0 364 void batchinsert(const char* dbName, const char* inFile);
mas01mc@324 365 void batchinsert_large_adb(const char* dbName, const char* inFile);
mas01cr@133 366 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 367 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01ik@355 368
mas01cr@284 369 unsigned random_track(unsigned *propTable, unsigned total);
mas01cr@280 370 void sample(const char *dbName);
mas01cr@0 371 void l2norm(const char* dbName);
mas01cr@193 372 void power_flag(const char *dbName);
mas01cr@193 373 bool powers_acceptable(double p1, double p2);
mas01cr@0 374 void dump(const char* dbName);
mas01mc@334 375 void liszt(const char* dbName, unsigned offset, unsigned numLines, adb__lisztResponse* adbLisztResponse=0);
mas01cr@0 376
mas01mc@292 377 // LSH indexing parameters and data structures
mas01mc@292 378 LSH* lsh;
mas01mc@292 379 bool lsh_in_core; // load LSH tables for query into core (true) or keep on disk (false)
mas01mc@292 380 bool lsh_use_u_functions;
mas01mc@292 381 bool lsh_exact; // flag to indicate use exact evaluation of points returned by LSH
mas01mc@308 382 bool WS_load_index; // flag to indicate that we want to make a Web Services index memory resident
mas01mc@292 383 double lsh_param_w; // Width of LSH hash-function bins
mas01mc@292 384 Uns32T lsh_param_k; // Number of independent hash functions
mas01mc@292 385 Uns32T lsh_param_m; // Combinatorial parameter for m(m-1)/2 hash tables
mas01mc@292 386 Uns32T lsh_param_N; // Number of rows per hash table
mas01mc@292 387 Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration
mas01mc@292 388 Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row
mas01mc@324 389 Uns32T lsh_n_point_bits; // How many bits to use to encode point ID within a track
mas01mc@324 390
mas01mc@292 391
mas01mc@292 392 // LSH vector<> containers for one in-core copy of a set of feature vectors
mas01mc@292 393 vector<float>::iterator vi; // feature vector iterator
mas01mc@292 394 vector<vector<float> > *vv; // one-track's worth data
mas01mc@292 395
mas01mc@292 396 // LSH indexing and retrieval methods
mas01mc@292 397 void index_index_db(const char* dbName);
mas01mc@292 398 void index_initialize(double**,double**,double**,double**,unsigned int*);
mas01mc@292 399 void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
mas01mc@292 400 int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
mas01mc@292 401 Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
mas01mc@292 402 void index_make_shingle(vector<vector<float> >*, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen);
mas01mc@292 403 int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp);
mas01mc@292 404 int index_query_loop(const char* dbName, Uns32T queryIndex);
mas01mc@292 405 vector<vector<float> >* index_initialize_shingles(Uns32T sz);
mas01mc@292 406 int index_init_query(const char* dbName);
mas01mc@292 407 int index_exists(const char* dbName, double radius, Uns32T sequenceLength);
mas01mc@292 408 char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength);
mas01mc@292 409 static void index_add_point_approximate(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method
mas01mc@292 410 static void index_add_point_exact(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method
mas01mc@324 411 static Uns32T index_to_trackID(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackID
mas01mc@324 412 static Uns32T index_to_trackPos(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackPos (spos)
mas01mc@324 413 static Uns32T index_from_trackInfo(Uns32T trackID, Uns32T pntID, Uns32T nPntBits); // Convert audioDB trackID and trackPos to an lsh point index
mas01mc@292 414 void initialize_exact_evalutation_queue();
mas01mc@292 415 void index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos);
mas01mc@308 416 LSH* index_allocate(char* indexName, bool load_hashTables);
mas01mc@324 417 void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
mas01mc@324 418
mas01mc@292 419 // Web Services
mas01cr@0 420 void startServer();
mas01ik@355 421
mas01mc@308 422 void ws_status(const char*dbName, char* hostport);
mas01mc@308 423 void ws_query(const char*dbName, const char *featureFileName, const char* hostport);
mas01mc@328 424 void ws_query_by_key(const char*dbName, const char *trackKey, const char* featureFileName, const char* hostport);
mas01mc@334 425 void ws_liszt(const char* dbName, char* hostport);
mas01mc@334 426
mas01cr@0 427 };
mas01mc@17 428
mas01mc@292 429 #define O2_AUDIODB_INITIALIZERS \
mas01mc@292 430 dim(0), \
mas01mc@292 431 dbName(0), \
mas01mc@292 432 inFile(0), \
mas01mc@292 433 key(0), \
mas01mc@292 434 trackFileName(0), \
mas01mc@292 435 trackFile(0), \
mas01mc@292 436 command(0), \
mas01mc@292 437 output(0), \
mas01mc@292 438 timesFileName(0), \
mas01mc@292 439 timesFile(0), \
mas01mc@292 440 powerFileName(0), \
mas01mc@292 441 powerFile(0), \
mas01mc@324 442 adb_root(0), \
mas01mc@324 443 adb_feature_root(0), \
mas01mc@324 444 powerfd(0), \
mas01mc@292 445 dbfid(0), \
mas01mc@292 446 lshfid(0), \
mas01mc@292 447 forWrite(false), \
mas01mc@292 448 infid(0), \
mas01mc@292 449 db(0), \
mas01mc@292 450 indata(0), \
mas01mc@292 451 dbH(0), \
mas01cr@392 452 adb(0), \
mas01mc@292 453 rng(0), \
mas01mc@292 454 fileTable(0), \
mas01mc@292 455 trackTable(0), \
mas01mc@292 456 trackOffsetTable(0), \
mas01mc@292 457 dataBuf(0), \
mas01mc@292 458 l2normTable(0), \
mas01mc@292 459 timesTable(0), \
mas01mc@314 460 powerTable(0), \
mas01mc@324 461 featureFileNameTable(0), \
mas01mc@324 462 timesFileNameTable(0), \
mas01mc@324 463 powerFileNameTable(0), \
mas01mc@292 464 fileTableLength(0), \
mas01mc@292 465 trackTableLength(0), \
mas01mc@292 466 dataBufLength(0), \
mas01mc@292 467 timesTableLength(0), \
mas01mc@292 468 powerTableLength(0), \
mas01mc@292 469 l2normTableLength(0), \
mas01mc@292 470 verbosity(1), \
mas01mc@292 471 nsamples(2000), \
mas01mc@292 472 datasize(O2_DEFAULT_DATASIZE), \
mas01mc@292 473 ntracks(O2_DEFAULT_NTRACKS), \
mas01mc@292 474 datadim(O2_DEFAULT_DATADIM), \
mas01mc@292 475 queryType(O2_POINT_QUERY), \
mas01mc@292 476 pointNN(O2_DEFAULT_POINTNN), \
mas01mc@292 477 trackNN(O2_DEFAULT_TRACKNN), \
mas01mc@292 478 sequenceLength(16), \
mas01mc@292 479 sequenceHop(1), \
mas01mc@292 480 normalizedDistance(true), \
mas01mc@292 481 no_unit_norming(false), \
mas01mc@292 482 queryPoint(0), \
mas01mc@292 483 usingQueryPoint(0), \
mas01mc@292 484 usingTimes(0), \
mas01mc@292 485 usingPower(0), \
mas01mc@292 486 isClient(0), \
mas01mc@292 487 isServer(0), \
mas01mc@292 488 port(0), \
mas01mc@292 489 timesTol(0.1), \
mas01mc@292 490 radius(0), \
mas01mc@292 491 query_from_key(false), \
mas01mc@292 492 query_from_key_index(O2_ERR_KEYNOTFOUND), \
mas01mc@292 493 use_absolute_threshold(false), \
mas01mc@292 494 absolute_threshold(0.0), \
mas01mc@292 495 use_relative_threshold(false), \
mas01mc@292 496 relative_threshold(0.0), \
mas01mc@292 497 reporter(0), \
mas01mc@292 498 exact_evaluation_queue(0), \
mas01mc@334 499 lisztOffset(0), \
mas01mc@334 500 lisztLength(0), \
mas01cr@370 501 apierrortemp(0), \
mas01cr@370 502 UseApiError(0), \
mas01mc@292 503 lsh(0), \
mas01mc@292 504 lsh_in_core(false), \
mas01mc@292 505 lsh_use_u_functions(false), \
mas01mc@292 506 lsh_exact(false), \
mas01mc@308 507 WS_load_index(false), \
mas01mc@292 508 lsh_param_k(0), \
mas01mc@292 509 lsh_param_m(0), \
mas01mc@292 510 lsh_param_N(0), \
mas01mc@292 511 lsh_param_b(0), \
mas01mc@292 512 lsh_param_ncols(0), \
mas01mc@324 513 lsh_n_point_bits(0), \
mas01cr@370 514 vv(0)
mas01mc@292 515 #endif