annotate audioDB.h @ 263:210b2f661b88

Added new query type "onetoonensequence" for matching each query sequence to the single closest match in the database. Most useful if query is not also in the database.
author mas01mc
date Mon, 09 Jun 2008 19:20:39 +0000
parents 4dcb09f5fe85
children 4ffa05f25a00 3be15407e814
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@0 15
mas01cr@0 16 // includes for web services
mas01cr@0 17 #include "soapH.h"
mas01cr@0 18 #include "cmdline.h"
mas01cr@0 19
mas01cr@0 20 #define MAXSTR 512
mas01cr@0 21
mas01cr@0 22 // Databse PRIMARY commands
mas01cr@0 23 #define COM_CREATE "--NEW"
mas01cr@0 24 #define COM_INSERT "--INSERT"
mas01cr@0 25 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 26 #define COM_QUERY "--QUERY"
mas01cr@0 27 #define COM_STATUS "--STATUS"
mas01cr@0 28 #define COM_L2NORM "--L2NORM"
mas01cr@193 29 #define COM_POWER "--POWER"
mas01cr@0 30 #define COM_DUMP "--DUMP"
mas01cr@0 31 #define COM_SERVER "--SERVER"
mas01cr@0 32
mas01cr@0 33 // parameters
mas01cr@0 34 #define COM_CLIENT "--client"
mas01cr@0 35 #define COM_DATABASE "--database"
mas01cr@0 36 #define COM_QTYPE "--qtype"
mas01cr@0 37 #define COM_SEQLEN "--sequencelength"
mas01cr@0 38 #define COM_SEQHOP "--sequencehop"
mas01cr@0 39 #define COM_POINTNN "--pointnn"
mas01mc@18 40 #define COM_TRACKNN "--resultlength"
mas01cr@0 41 #define COM_QPOINT "--qpoint"
mas01cr@0 42 #define COM_FEATURES "--features"
mas01cr@0 43 #define COM_QUERYKEY "--key"
mas01cr@0 44 #define COM_KEYLIST "--keyList"
mas01cr@0 45 #define COM_TIMES "--times"
mas01cr@193 46 #define COM_QUERYPOWER "--power"
mas01cr@193 47 #define COM_RELATIVE_THRESH "--relative-threshold"
mas01cr@193 48 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
mas01cr@0 49
mas01cr@108 50 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 51 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@210 52 #define O2_FORMAT_VERSION (4U)
mas01cr@0 53
mas01cr@0 54 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 55 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 56
mas01mc@248 57 //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
mas01mc@7 58 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01cr@0 59
mas01cr@256 60 #define O2_DEFAULT_DATASIZE (1355U) // in MB
mas01cr@256 61 #define O2_DEFAULT_NTRACKS (20000U)
mas01cr@256 62 #define O2_DEFAULT_DATADIM (9U)
mas01cr@256 63
mas01cr@239 64 #define O2_MAXFILES (20000U)
mas01cr@0 65 #define O2_MAXFILESTR (256U)
mas01cr@256 66 #define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR)
mas01cr@256 67 #define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned))
mas01cr@0 68 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 69 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 70 #define O2_MAXDIM (1000U)
mas01mc@263 71 #define O2_MAXNN (1000000U)
mas01cr@0 72
mas01cr@0 73 // Flags
mas01cr@0 74 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 75 #define O2_FLAG_MINMAX (0x2U)
mas01cr@193 76 #define O2_FLAG_POWER (0x4U)
mas01cr@0 77 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 78
mas01cr@105 79 // Query types
mas01cr@105 80 #define O2_POINT_QUERY (0x4U)
mas01cr@105 81 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 82 #define O2_TRACK_QUERY (0x10U)
mas01mc@248 83 #define O2_N_SEQUENCE_QUERY (0x20U)
mas01mc@263 84 #define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)
mas01mc@248 85
mas01cr@105 86
mas01cr@0 87 // Error Codes
mas01cr@0 88 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 89
mas01cr@0 90 // Macros
mas01cr@0 91 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 92
mas01cr@108 93 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 94 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 95
mas01cr@196 96 #define ALIGN_PAGE_UP(x) ((x) + (getpagesize()-1) & ~(getpagesize()-1))
mas01cr@196 97 #define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
mas01cr@196 98
mas01cr@166 99 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 100
mas01cr@239 101 #define CHECKED_MMAP(type, var, start, length) \
mas01cr@239 102 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
mas01cr@239 103 if(tmp == (void *) -1) { \
mas01cr@239 104 error("mmap error for db table", #var, "mmap"); \
mas01cr@239 105 } \
mas01cr@239 106 var = (type) tmp; \
mas01cr@239 107 }
mas01cr@239 108
mas01cr@239 109 #define VERB_LOG(vv, ...) \
mas01cr@239 110 if(verbosity > vv) { \
mas01cr@239 111 fprintf(stderr, __VA_ARGS__); \
mas01cr@239 112 fflush(stderr); \
mas01cr@239 113 }
mas01cr@0 114
mas01cr@210 115 typedef struct dbTableHeader {
mas01cr@114 116 uint32_t magic;
mas01cr@114 117 uint32_t version;
mas01cr@114 118 uint32_t numFiles;
mas01cr@114 119 uint32_t dim;
mas01cr@114 120 uint32_t flags;
mas01cr@210 121 uint32_t headerSize;
mas01cr@196 122 off_t length;
mas01cr@196 123 off_t fileTableOffset;
mas01cr@196 124 off_t trackTableOffset;
mas01cr@196 125 off_t dataOffset;
mas01cr@196 126 off_t l2normTableOffset;
mas01cr@196 127 off_t timesTableOffset;
mas01cr@196 128 off_t powerTableOffset;
mas01cr@196 129 off_t dbSize;
mas01cr@0 130 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 131
mas01cr@239 132 class Reporter;
mas01cr@0 133
mas01cr@0 134 class audioDB{
mas01cr@0 135
mas01cr@0 136 private:
mas01cr@0 137 gengetopt_args_info args_info;
mas01cr@0 138 unsigned dim;
mas01cr@0 139 const char *dbName;
mas01cr@0 140 const char *inFile;
mas01cr@0 141 const char *hostport;
mas01cr@0 142 const char *key;
mas01mc@18 143 const char* trackFileName;
mas01cr@239 144 std::ifstream *trackFile;
mas01cr@0 145 const char *command;
mas01cr@131 146 const char *output;
mas01cr@0 147 const char *timesFileName;
mas01cr@239 148 std::ifstream *timesFile;
mas01cr@193 149 const char *powerFileName;
mas01cr@239 150 std::ifstream *powerFile;
mas01cr@193 151 int powerfd;
mas01cr@0 152
mas01cr@0 153 int dbfid;
mas01cr@196 154 bool forWrite;
mas01cr@0 155 int infid;
mas01cr@0 156 char* db;
mas01cr@0 157 char* indata;
mas01cr@0 158 struct stat statbuf;
mas01cr@0 159 dbTableHeaderPtr dbH;
mas01cr@0 160
mas01cr@0 161 char *fileTable;
mas01mc@18 162 unsigned* trackTable;
mas01cr@0 163 double* dataBuf;
mas01cr@0 164 double* inBuf;
mas01cr@0 165 double* l2normTable;
mas01cr@196 166 double* timesTable;
mas01cr@193 167 double* powerTable;
mas01cr@0 168
mas01cr@196 169 size_t fileTableLength;
mas01cr@196 170 size_t trackTableLength;
mas01cr@196 171 off_t dataBufLength;
mas01cr@196 172 size_t timesTableLength;
mas01cr@196 173 size_t powerTableLength;
mas01cr@196 174 size_t l2normTableLength;
mas01cr@196 175
mas01cr@0 176 // Flags and parameters
mas01cr@0 177 unsigned verbosity; // how much do we want to know?
mas01cr@256 178
mas01cr@256 179 //off_t size; // given size (for creation)
mas01cr@256 180 unsigned datasize; // size in MB
mas01cr@256 181 unsigned ntracks;
mas01cr@256 182 unsigned datadim;
mas01cr@256 183
mas01cr@0 184 unsigned queryType; // point queries default
mas01cr@0 185 unsigned pointNN; // how many point NNs ?
mas01mc@18 186 unsigned trackNN; // how many track NNs ?
mas01cr@0 187 unsigned sequenceLength;
mas01cr@0 188 unsigned sequenceHop;
mas01cr@239 189 bool normalizedDistance;
mas01cr@0 190 unsigned queryPoint;
mas01cr@0 191 unsigned usingQueryPoint;
mas01cr@0 192 unsigned usingTimes;
mas01cr@193 193 unsigned usingPower;
mas01cr@0 194 unsigned isClient;
mas01cr@0 195 unsigned isServer;
mas01cr@0 196 unsigned port;
mas01cr@0 197 double timesTol;
mas01mc@17 198 double radius;
mas01cr@193 199
mas01cr@193 200 bool use_absolute_threshold;
mas01cr@193 201 double absolute_threshold;
mas01cr@193 202 bool use_relative_threshold;
mas01cr@193 203 double relative_threshold;
mas01cr@193 204
mas01mc@17 205
mas01cr@0 206 // Timers
mas01cr@0 207 struct timeval tv1;
mas01cr@0 208 struct timeval tv2;
mas01cr@0 209
mas01cr@0 210 // private methods
mas01cr@32 211 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@193 212 void sequence_sum(double *buffer, int length, int seqlen);
mas01cr@193 213 void sequence_sqrt(double *buffer, int length, int seqlen);
mas01cr@193 214 void sequence_average(double *buffer, int length, int seqlen);
mas01cr@193 215
mas01cr@239 216 void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD);
mas01cr@239 217 void delete_arrays(int track, unsigned int numVectors, double **D, double **DD);
mas01cr@239 218 void read_data(int track, double **data_buffer_p, size_t *data_buffer_size_p);
mas01cr@239 219 void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp);
mas01cr@239 220 void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp);
mas01cr@240 221 void query_loop(const char* dbName, const char* inFile, Reporter *reporter);
mas01cr@0 222
mas01cr@196 223 void initDBHeader(const char *dbName);
mas01cr@169 224 void initInputFile(const char *inFile);
mas01cr@196 225 void initTables(const char* dbName, const char* inFile);
mas01cr@0 226 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 227 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@239 228 void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
mas01cr@193 229 void insertPowerData(unsigned n, int powerfd, double *powerdata);
mas01cr@0 230 unsigned getKeyPos(char* key);
mas01cr@0 231 public:
mas01cr@0 232
mas01cr@76 233 audioDB(const unsigned argc, char* const argv[]);
mas01cr@133 234 audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
mas01cr@133 235 audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse);
mas01cr@97 236 void cleanup();
mas01cr@0 237 ~audioDB();
mas01cr@0 238 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 239 void get_lock(int fd, bool exclusive);
mas01cr@30 240 void release_lock(int fd);
mas01cr@0 241 void create(const char* dbName);
mas01cr@0 242 void drop();
mas01cr@251 243 bool enough_per_file_space_free();
mas01cr@196 244 bool enough_data_space_free(off_t size);
mas01cr@196 245 void insert_data_vectors(off_t offset, void *buffer, size_t size);
mas01cr@0 246 void insert(const char* dbName, const char* inFile);
mas01cr@0 247 void batchinsert(const char* dbName, const char* inFile);
mas01cr@133 248 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 249 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01cr@0 250 void ws_status(const char*dbName, char* hostport);
mas01mc@18 251 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 252 void l2norm(const char* dbName);
mas01cr@193 253 void power_flag(const char *dbName);
mas01cr@193 254 bool powers_acceptable(double p1, double p2);
mas01cr@0 255 void dump(const char* dbName);
mas01cr@0 256
mas01cr@0 257 // web services
mas01cr@0 258 void startServer();
mas01cr@0 259
mas01cr@0 260 };
mas01mc@17 261
mas01cr@105 262 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 263 dim(0), \
mas01cr@105 264 dbName(0), \
mas01cr@105 265 inFile(0), \
mas01cr@105 266 key(0), \
mas01cr@105 267 trackFileName(0), \
mas01cr@105 268 trackFile(0), \
mas01cr@105 269 command(0), \
mas01cr@131 270 output(0), \
mas01cr@105 271 timesFileName(0), \
mas01cr@105 272 timesFile(0), \
mas01cr@193 273 powerFileName(0), \
mas01cr@193 274 powerFile(0), \
mas01cr@193 275 powerfd(0), \
mas01cr@105 276 dbfid(0), \
mas01cr@196 277 forWrite(false), \
mas01cr@105 278 infid(0), \
mas01cr@105 279 db(0), \
mas01cr@105 280 indata(0), \
mas01cr@105 281 dbH(0), \
mas01cr@105 282 fileTable(0), \
mas01cr@105 283 trackTable(0), \
mas01cr@105 284 dataBuf(0), \
mas01cr@105 285 l2normTable(0), \
mas01cr@105 286 timesTable(0), \
mas01cr@196 287 fileTableLength(0), \
mas01cr@196 288 trackTableLength(0), \
mas01cr@196 289 dataBufLength(0), \
mas01cr@196 290 timesTableLength(0), \
mas01cr@196 291 powerTableLength(0), \
mas01cr@196 292 l2normTableLength(0), \
mas01cr@105 293 verbosity(1), \
mas01cr@256 294 datasize(O2_DEFAULT_DATASIZE), \
mas01cr@256 295 ntracks(O2_DEFAULT_NTRACKS), \
mas01cr@256 296 datadim(O2_DEFAULT_DATADIM), \
mas01cr@105 297 queryType(O2_POINT_QUERY), \
mas01cr@105 298 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 299 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 300 sequenceLength(16), \
mas01cr@105 301 sequenceHop(1), \
mas01cr@239 302 normalizedDistance(true), \
mas01cr@105 303 queryPoint(0), \
mas01cr@105 304 usingQueryPoint(0), \
mas01cr@105 305 usingTimes(0), \
mas01cr@193 306 usingPower(0), \
mas01cr@105 307 isClient(0), \
mas01cr@105 308 isServer(0), \
mas01cr@105 309 port(0), \
mas01cr@105 310 timesTol(0.1), \
mas01cr@193 311 radius(0), \
mas01cr@193 312 use_absolute_threshold(false), \
mas01cr@193 313 absolute_threshold(0.0), \
mas01cr@193 314 use_relative_threshold(false), \
mas01cr@193 315 relative_threshold(0.0)