annotate audioDB.h @ 142:c74048e9b569 powertable

Beginnings of power table support Database header change (and hence version) Read in the powertable on init (and write on create)
author mas01cr
date Tue, 30 Oct 2007 14:26:50 +0000
parents a5d5a55a412d
children 6ae986868114
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@0 15
mas01cr@0 16 // includes for web services
mas01cr@0 17 #include "soapH.h"
mas01cr@0 18 #include "adb.nsmap"
mas01cr@0 19 #include "cmdline.h"
mas01cr@0 20
mas01cr@0 21 #define MAXSTR 512
mas01cr@0 22
mas01cr@0 23 // Databse PRIMARY commands
mas01cr@0 24 #define COM_CREATE "--NEW"
mas01cr@0 25 #define COM_INSERT "--INSERT"
mas01cr@0 26 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 27 #define COM_QUERY "--QUERY"
mas01cr@0 28 #define COM_STATUS "--STATUS"
mas01cr@0 29 #define COM_L2NORM "--L2NORM"
mas01cr@0 30 #define COM_DUMP "--DUMP"
mas01cr@0 31 #define COM_SERVER "--SERVER"
mas01cr@0 32
mas01cr@0 33 // parameters
mas01cr@0 34 #define COM_CLIENT "--client"
mas01cr@0 35 #define COM_DATABASE "--database"
mas01cr@0 36 #define COM_QTYPE "--qtype"
mas01cr@0 37 #define COM_SEQLEN "--sequencelength"
mas01cr@0 38 #define COM_SEQHOP "--sequencehop"
mas01cr@0 39 #define COM_POINTNN "--pointnn"
mas01mc@18 40 #define COM_TRACKNN "--resultlength"
mas01cr@0 41 #define COM_QPOINT "--qpoint"
mas01cr@0 42 #define COM_FEATURES "--features"
mas01cr@0 43 #define COM_QUERYKEY "--key"
mas01cr@0 44 #define COM_KEYLIST "--keyList"
mas01cr@0 45 #define COM_TIMES "--times"
mas01cr@0 46
mas01cr@108 47 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 48 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@142 49 #define O2_FORMAT_VERSION (1U)
mas01cr@0 50
mas01cr@0 51 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 52 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 53
mas01mc@7 54 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01mc@7 55 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 56
mas01cr@0 57 //#define O2_MAXFILES (1000000)
mas01cr@0 58 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 59 #define O2_MAXFILESTR (256U)
mas01cr@0 60 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01mc@18 61 #define O2_TRACKTABLESIZE (sizeof(unsigned))
mas01cr@0 62 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 63 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 64 #define O2_MAXDIM (1000U)
mas01mc@17 65 #define O2_MAXNN (10000U)
mas01cr@0 66
mas01cr@0 67 // Flags
mas01cr@0 68 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 69 #define O2_FLAG_MINMAX (0x2U)
mas01cr@142 70 #define O2_FLAG_POWER (0x4U)
mas01cr@0 71 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 72
mas01cr@105 73 // Query types
mas01cr@105 74 #define O2_POINT_QUERY (0x4U)
mas01cr@105 75 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 76 #define O2_TRACK_QUERY (0x10U)
mas01cr@105 77
mas01cr@0 78 // Error Codes
mas01cr@0 79 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 80
mas01cr@0 81 // Macros
mas01cr@0 82 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 83
mas01cr@108 84 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 85 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 86
mas01cr@0 87 using namespace std;
mas01cr@0 88
mas01cr@0 89 typedef struct dbTableHeader{
mas01cr@114 90 uint32_t magic;
mas01cr@114 91 uint32_t version;
mas01cr@114 92 uint32_t numFiles;
mas01cr@114 93 uint32_t dim;
mas01cr@114 94 uint32_t flags;
mas01cr@111 95 // FIXME: these lengths and offsets should be size_t or off_t, but
mas01cr@111 96 // that causes this header (and hence audioDB files) to be
mas01cr@111 97 // unportable between 32 and 64-bit architectures. Making them
mas01cr@114 98 // uint32_t isn't the real answer, as it means we won't be able to
mas01cr@114 99 // scale to really large collections easily but it works around the
mas01cr@128 100 // problem. Expanding to 64 bits will of course need a change in
mas01cr@128 101 // file format version. -- CSR, 2007-10-05
mas01cr@114 102 uint32_t length;
mas01cr@114 103 uint32_t fileTableOffset;
mas01cr@114 104 uint32_t trackTableOffset;
mas01cr@114 105 uint32_t dataOffset;
mas01cr@114 106 uint32_t l2normTableOffset;
mas01cr@114 107 uint32_t timesTableOffset;
mas01cr@142 108 uint32_t powerTableOffset;
mas01cr@128 109 uint32_t dbSize;
mas01cr@0 110 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 111
mas01cr@0 112
mas01cr@0 113 class audioDB{
mas01cr@0 114
mas01cr@0 115 private:
mas01cr@0 116 gengetopt_args_info args_info;
mas01cr@0 117 unsigned dim;
mas01cr@0 118 const char *dbName;
mas01cr@0 119 const char *inFile;
mas01cr@0 120 const char *hostport;
mas01cr@0 121 const char *key;
mas01mc@18 122 const char* trackFileName;
mas01mc@18 123 ifstream *trackFile;
mas01cr@0 124 const char *command;
mas01cr@131 125 const char *output;
mas01cr@0 126 const char *timesFileName;
mas01cr@0 127 ifstream *timesFile;
mas01cr@0 128
mas01cr@0 129 int dbfid;
mas01cr@0 130 int infid;
mas01cr@0 131 char* db;
mas01cr@0 132 char* indata;
mas01cr@0 133 struct stat statbuf;
mas01cr@0 134 dbTableHeaderPtr dbH;
mas01cr@0 135
mas01cr@0 136 char *fileTable;
mas01mc@18 137 unsigned* trackTable;
mas01cr@0 138 double* dataBuf;
mas01cr@0 139 double* inBuf;
mas01cr@0 140 double* l2normTable;
mas01cr@0 141 double* qNorm;
mas01cr@0 142 double* sNorm;
mas01cr@0 143 double* timesTable;
mas01cr@142 144 double* powerTable;
mas01cr@0 145
mas01cr@0 146 // Flags and parameters
mas01cr@0 147 unsigned verbosity; // how much do we want to know?
mas01cr@129 148 unsigned size; // given size (for creation)
mas01cr@0 149 unsigned queryType; // point queries default
mas01cr@0 150 unsigned pointNN; // how many point NNs ?
mas01mc@18 151 unsigned trackNN; // how many track NNs ?
mas01cr@0 152 unsigned sequenceLength;
mas01cr@0 153 unsigned sequenceHop;
mas01cr@0 154 unsigned queryPoint;
mas01cr@0 155 unsigned usingQueryPoint;
mas01cr@0 156 unsigned usingTimes;
mas01cr@0 157 unsigned isClient;
mas01cr@0 158 unsigned isServer;
mas01cr@0 159 unsigned port;
mas01cr@0 160 double timesTol;
mas01mc@17 161 double radius;
mas01mc@17 162
mas01cr@0 163 // Timers
mas01cr@0 164 struct timeval tv1;
mas01cr@0 165 struct timeval tv2;
mas01cr@0 166
mas01cr@0 167 // private methods
mas01cr@32 168 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@133 169 void pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 170 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 171 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 172 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@0 173
mas01cr@120 174 void initDBHeader(const char *dbName, bool forWrite);
mas01cr@27 175 void initTables(const char* dbName, bool forWrite, const char* inFile);
mas01cr@0 176 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 177 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 178 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@0 179 unsigned getKeyPos(char* key);
mas01cr@0 180 public:
mas01cr@0 181
mas01cr@76 182 audioDB(const unsigned argc, char* const argv[]);
mas01cr@133 183 audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
mas01cr@133 184 audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse);
mas01cr@97 185 void cleanup();
mas01cr@0 186 ~audioDB();
mas01cr@0 187 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 188 void get_lock(int fd, bool exclusive);
mas01cr@30 189 void release_lock(int fd);
mas01cr@0 190 void create(const char* dbName);
mas01cr@0 191 void drop();
mas01cr@0 192 void insert(const char* dbName, const char* inFile);
mas01cr@0 193 void batchinsert(const char* dbName, const char* inFile);
mas01cr@133 194 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 195 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01cr@0 196 void ws_status(const char*dbName, char* hostport);
mas01mc@18 197 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 198 void l2norm(const char* dbName);
mas01cr@0 199 void dump(const char* dbName);
mas01cr@0 200
mas01cr@0 201 // web services
mas01cr@0 202 void startServer();
mas01cr@0 203
mas01cr@0 204 };
mas01mc@17 205
mas01cr@105 206 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 207 dim(0), \
mas01cr@105 208 dbName(0), \
mas01cr@105 209 inFile(0), \
mas01cr@105 210 key(0), \
mas01cr@105 211 trackFileName(0), \
mas01cr@105 212 trackFile(0), \
mas01cr@105 213 command(0), \
mas01cr@131 214 output(0), \
mas01cr@105 215 timesFileName(0), \
mas01cr@105 216 timesFile(0), \
mas01cr@105 217 dbfid(0), \
mas01cr@105 218 infid(0), \
mas01cr@105 219 db(0), \
mas01cr@105 220 indata(0), \
mas01cr@105 221 dbH(0), \
mas01cr@105 222 fileTable(0), \
mas01cr@105 223 trackTable(0), \
mas01cr@105 224 dataBuf(0), \
mas01cr@105 225 l2normTable(0), \
mas01cr@105 226 qNorm(0), \
mas01cr@105 227 timesTable(0), \
mas01cr@105 228 verbosity(1), \
mas01cr@129 229 size(O2_DEFAULTDBSIZE), \
mas01cr@105 230 queryType(O2_POINT_QUERY), \
mas01cr@105 231 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 232 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 233 sequenceLength(16), \
mas01cr@105 234 sequenceHop(1), \
mas01cr@105 235 queryPoint(0), \
mas01cr@105 236 usingQueryPoint(0), \
mas01cr@105 237 usingTimes(0), \
mas01cr@105 238 isClient(0), \
mas01cr@105 239 isServer(0), \
mas01cr@105 240 port(0), \
mas01cr@105 241 timesTol(0.1), \
mas01cr@105 242 radius(0)