annotate audioDB.h @ 166:ea6936b82915

Defensive treatment of SOAP input Ensure that things that are assumed to be supplied are actually supplied, so that we will fail gracefully rather than trip over broken assumptions...
author mas01cr
date Tue, 06 Nov 2007 16:29:56 +0000
parents a5d5a55a412d
children cdd441dcc9a8
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@0 15
mas01cr@0 16 // includes for web services
mas01cr@0 17 #include "soapH.h"
mas01cr@0 18 #include "adb.nsmap"
mas01cr@0 19 #include "cmdline.h"
mas01cr@0 20
mas01cr@0 21 #define MAXSTR 512
mas01cr@0 22
mas01cr@0 23 // Databse PRIMARY commands
mas01cr@0 24 #define COM_CREATE "--NEW"
mas01cr@0 25 #define COM_INSERT "--INSERT"
mas01cr@0 26 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 27 #define COM_QUERY "--QUERY"
mas01cr@0 28 #define COM_STATUS "--STATUS"
mas01cr@0 29 #define COM_L2NORM "--L2NORM"
mas01cr@0 30 #define COM_DUMP "--DUMP"
mas01cr@0 31 #define COM_SERVER "--SERVER"
mas01cr@0 32
mas01cr@0 33 // parameters
mas01cr@0 34 #define COM_CLIENT "--client"
mas01cr@0 35 #define COM_DATABASE "--database"
mas01cr@0 36 #define COM_QTYPE "--qtype"
mas01cr@0 37 #define COM_SEQLEN "--sequencelength"
mas01cr@0 38 #define COM_SEQHOP "--sequencehop"
mas01cr@0 39 #define COM_POINTNN "--pointnn"
mas01mc@18 40 #define COM_TRACKNN "--resultlength"
mas01cr@0 41 #define COM_QPOINT "--qpoint"
mas01cr@0 42 #define COM_FEATURES "--features"
mas01cr@0 43 #define COM_QUERYKEY "--key"
mas01cr@0 44 #define COM_KEYLIST "--keyList"
mas01cr@0 45 #define COM_TIMES "--times"
mas01cr@0 46
mas01cr@108 47 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 48 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@108 49 #define O2_FORMAT_VERSION (0U)
mas01cr@0 50
mas01cr@0 51 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 52 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 53
mas01mc@7 54 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01mc@7 55 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 56
mas01cr@0 57 //#define O2_MAXFILES (1000000)
mas01cr@0 58 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 59 #define O2_MAXFILESTR (256U)
mas01cr@0 60 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01mc@18 61 #define O2_TRACKTABLESIZE (sizeof(unsigned))
mas01cr@0 62 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 63 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 64 #define O2_MAXDIM (1000U)
mas01mc@17 65 #define O2_MAXNN (10000U)
mas01cr@0 66
mas01cr@0 67 // Flags
mas01cr@0 68 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 69 #define O2_FLAG_MINMAX (0x2U)
mas01cr@0 70 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 71
mas01cr@105 72 // Query types
mas01cr@105 73 #define O2_POINT_QUERY (0x4U)
mas01cr@105 74 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 75 #define O2_TRACK_QUERY (0x10U)
mas01cr@105 76
mas01cr@0 77 // Error Codes
mas01cr@0 78 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 79
mas01cr@0 80 // Macros
mas01cr@0 81 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 82
mas01cr@108 83 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 84 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 85
mas01cr@166 86 #define ENSURE_STRING(x) ((x) ? (x) : "")
mas01cr@166 87
mas01cr@0 88 using namespace std;
mas01cr@0 89
mas01cr@0 90 typedef struct dbTableHeader{
mas01cr@114 91 uint32_t magic;
mas01cr@114 92 uint32_t version;
mas01cr@114 93 uint32_t numFiles;
mas01cr@114 94 uint32_t dim;
mas01cr@114 95 uint32_t flags;
mas01cr@111 96 // FIXME: these lengths and offsets should be size_t or off_t, but
mas01cr@111 97 // that causes this header (and hence audioDB files) to be
mas01cr@111 98 // unportable between 32 and 64-bit architectures. Making them
mas01cr@114 99 // uint32_t isn't the real answer, as it means we won't be able to
mas01cr@114 100 // scale to really large collections easily but it works around the
mas01cr@128 101 // problem. Expanding to 64 bits will of course need a change in
mas01cr@128 102 // file format version. -- CSR, 2007-10-05
mas01cr@114 103 uint32_t length;
mas01cr@114 104 uint32_t fileTableOffset;
mas01cr@114 105 uint32_t trackTableOffset;
mas01cr@114 106 uint32_t dataOffset;
mas01cr@114 107 uint32_t l2normTableOffset;
mas01cr@114 108 uint32_t timesTableOffset;
mas01cr@128 109 uint32_t dbSize;
mas01cr@0 110 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 111
mas01cr@0 112
mas01cr@0 113 class audioDB{
mas01cr@0 114
mas01cr@0 115 private:
mas01cr@0 116 gengetopt_args_info args_info;
mas01cr@0 117 unsigned dim;
mas01cr@0 118 const char *dbName;
mas01cr@0 119 const char *inFile;
mas01cr@0 120 const char *hostport;
mas01cr@0 121 const char *key;
mas01mc@18 122 const char* trackFileName;
mas01mc@18 123 ifstream *trackFile;
mas01cr@0 124 const char *command;
mas01cr@131 125 const char *output;
mas01cr@0 126 const char *timesFileName;
mas01cr@0 127 ifstream *timesFile;
mas01cr@0 128
mas01cr@0 129 int dbfid;
mas01cr@0 130 int infid;
mas01cr@0 131 char* db;
mas01cr@0 132 char* indata;
mas01cr@0 133 struct stat statbuf;
mas01cr@0 134 dbTableHeaderPtr dbH;
mas01cr@0 135
mas01cr@0 136 char *fileTable;
mas01mc@18 137 unsigned* trackTable;
mas01cr@0 138 double* dataBuf;
mas01cr@0 139 double* inBuf;
mas01cr@0 140 double* l2normTable;
mas01cr@0 141 double* qNorm;
mas01cr@0 142 double* sNorm;
mas01cr@0 143 double* timesTable;
mas01cr@0 144
mas01cr@0 145 // Flags and parameters
mas01cr@0 146 unsigned verbosity; // how much do we want to know?
mas01cr@129 147 unsigned size; // given size (for creation)
mas01cr@0 148 unsigned queryType; // point queries default
mas01cr@0 149 unsigned pointNN; // how many point NNs ?
mas01mc@18 150 unsigned trackNN; // how many track NNs ?
mas01cr@0 151 unsigned sequenceLength;
mas01cr@0 152 unsigned sequenceHop;
mas01cr@0 153 unsigned queryPoint;
mas01cr@0 154 unsigned usingQueryPoint;
mas01cr@0 155 unsigned usingTimes;
mas01cr@0 156 unsigned isClient;
mas01cr@0 157 unsigned isServer;
mas01cr@0 158 unsigned port;
mas01cr@0 159 double timesTol;
mas01mc@17 160 double radius;
mas01mc@17 161
mas01cr@0 162 // Timers
mas01cr@0 163 struct timeval tv1;
mas01cr@0 164 struct timeval tv2;
mas01cr@0 165
mas01cr@0 166 // private methods
mas01cr@32 167 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@133 168 void pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 169 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 170 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 171 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@0 172
mas01cr@120 173 void initDBHeader(const char *dbName, bool forWrite);
mas01cr@27 174 void initTables(const char* dbName, bool forWrite, const char* inFile);
mas01cr@0 175 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 176 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 177 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@0 178 unsigned getKeyPos(char* key);
mas01cr@0 179 public:
mas01cr@0 180
mas01cr@76 181 audioDB(const unsigned argc, char* const argv[]);
mas01cr@133 182 audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
mas01cr@133 183 audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse);
mas01cr@97 184 void cleanup();
mas01cr@0 185 ~audioDB();
mas01cr@0 186 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 187 void get_lock(int fd, bool exclusive);
mas01cr@30 188 void release_lock(int fd);
mas01cr@0 189 void create(const char* dbName);
mas01cr@0 190 void drop();
mas01cr@0 191 void insert(const char* dbName, const char* inFile);
mas01cr@0 192 void batchinsert(const char* dbName, const char* inFile);
mas01cr@133 193 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 194 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01cr@0 195 void ws_status(const char*dbName, char* hostport);
mas01mc@18 196 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 197 void l2norm(const char* dbName);
mas01cr@0 198 void dump(const char* dbName);
mas01cr@0 199
mas01cr@0 200 // web services
mas01cr@0 201 void startServer();
mas01cr@0 202
mas01cr@0 203 };
mas01mc@17 204
mas01cr@105 205 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 206 dim(0), \
mas01cr@105 207 dbName(0), \
mas01cr@105 208 inFile(0), \
mas01cr@105 209 key(0), \
mas01cr@105 210 trackFileName(0), \
mas01cr@105 211 trackFile(0), \
mas01cr@105 212 command(0), \
mas01cr@131 213 output(0), \
mas01cr@105 214 timesFileName(0), \
mas01cr@105 215 timesFile(0), \
mas01cr@105 216 dbfid(0), \
mas01cr@105 217 infid(0), \
mas01cr@105 218 db(0), \
mas01cr@105 219 indata(0), \
mas01cr@105 220 dbH(0), \
mas01cr@105 221 fileTable(0), \
mas01cr@105 222 trackTable(0), \
mas01cr@105 223 dataBuf(0), \
mas01cr@105 224 l2normTable(0), \
mas01cr@105 225 qNorm(0), \
mas01cr@105 226 timesTable(0), \
mas01cr@105 227 verbosity(1), \
mas01cr@129 228 size(O2_DEFAULTDBSIZE), \
mas01cr@105 229 queryType(O2_POINT_QUERY), \
mas01cr@105 230 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 231 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 232 sequenceLength(16), \
mas01cr@105 233 sequenceHop(1), \
mas01cr@105 234 queryPoint(0), \
mas01cr@105 235 usingQueryPoint(0), \
mas01cr@105 236 usingTimes(0), \
mas01cr@105 237 isClient(0), \
mas01cr@105 238 isServer(0), \
mas01cr@105 239 port(0), \
mas01cr@105 240 timesTol(0.1), \
mas01cr@105 241 radius(0)