annotate audioDB.h @ 171:bb934f91d85c powertable

Web services interface to the powertable functionality. * Implemented as a completely new SOAP method, called sequenceQuery * lightly tested with http://doc.gold.ac.uk/~mas01cr/poweradb.php (lacuna: need to pass relative-threshold and absolute-threshold always, even when they don't make sense.) Several uglinesses in the code, some of which will be resolved once all the various branches are merged into the trunk.
author mas01cr
date Wed, 14 Nov 2007 14:00:53 +0000
parents 9ec0f27e07b9
children
rev   line source
mas01cr@0 1 #include <stdio.h>
mas01cr@0 2 #include <stdlib.h>
mas01cr@0 3 #include <sys/types.h>
mas01cr@0 4 #include <sys/stat.h>
mas01cr@0 5 #include <sys/mman.h>
mas01cr@0 6 #include <fcntl.h>
mas01cr@0 7 #include <string.h>
mas01cr@0 8 #include <iostream>
mas01cr@0 9 #include <fstream>
mas01cr@0 10 #include <math.h>
mas01cr@0 11 #include <sys/time.h>
mas01cr@0 12 #include <assert.h>
mas01cr@62 13 #include <float.h>
mas01cr@104 14 #include <signal.h>
mas01cr@0 15
mas01cr@0 16 // includes for web services
mas01cr@0 17 #include "soapH.h"
mas01cr@0 18 #include "adb.nsmap"
mas01cr@0 19 #include "cmdline.h"
mas01cr@0 20
mas01cr@0 21 #define MAXSTR 512
mas01cr@0 22
mas01cr@0 23 // Databse PRIMARY commands
mas01cr@0 24 #define COM_CREATE "--NEW"
mas01cr@0 25 #define COM_INSERT "--INSERT"
mas01cr@0 26 #define COM_BATCHINSERT "--BATCHINSERT"
mas01cr@0 27 #define COM_QUERY "--QUERY"
mas01cr@0 28 #define COM_STATUS "--STATUS"
mas01cr@0 29 #define COM_L2NORM "--L2NORM"
mas01cr@146 30 #define COM_POWER "--POWER"
mas01cr@0 31 #define COM_DUMP "--DUMP"
mas01cr@0 32 #define COM_SERVER "--SERVER"
mas01cr@0 33
mas01cr@0 34 // parameters
mas01cr@0 35 #define COM_CLIENT "--client"
mas01cr@0 36 #define COM_DATABASE "--database"
mas01cr@0 37 #define COM_QTYPE "--qtype"
mas01cr@0 38 #define COM_SEQLEN "--sequencelength"
mas01cr@0 39 #define COM_SEQHOP "--sequencehop"
mas01cr@0 40 #define COM_POINTNN "--pointnn"
mas01mc@18 41 #define COM_TRACKNN "--resultlength"
mas01cr@0 42 #define COM_QPOINT "--qpoint"
mas01cr@0 43 #define COM_FEATURES "--features"
mas01cr@0 44 #define COM_QUERYKEY "--key"
mas01cr@0 45 #define COM_KEYLIST "--keyList"
mas01cr@0 46 #define COM_TIMES "--times"
mas01cr@171 47 #define COM_QUERYPOWER "--power"
mas01cr@171 48 #define COM_RELATIVE_THRESH "--relative-threshold"
mas01cr@171 49 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
mas01cr@0 50
mas01cr@108 51 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
mas01cr@108 52 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
mas01cr@142 53 #define O2_FORMAT_VERSION (1U)
mas01cr@0 54
mas01cr@0 55 #define O2_DEFAULT_POINTNN (10U)
mas01mc@18 56 #define O2_DEFAULT_TRACKNN (10U)
mas01cr@0 57
mas01mc@7 58 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
mas01mc@7 59 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
mas01cr@0 60
mas01cr@0 61 //#define O2_MAXFILES (1000000)
mas01cr@0 62 #define O2_MAXFILES (10000U) // 10,000 files
mas01cr@0 63 #define O2_MAXFILESTR (256U)
mas01cr@0 64 #define O2_FILETABLESIZE (O2_MAXFILESTR)
mas01mc@18 65 #define O2_TRACKTABLESIZE (sizeof(unsigned))
mas01cr@0 66 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
mas01cr@0 67 #define O2_MEANNUMVECTORS (1000U)
mas01cr@0 68 #define O2_MAXDIM (1000U)
mas01mc@17 69 #define O2_MAXNN (10000U)
mas01cr@0 70
mas01cr@0 71 // Flags
mas01cr@0 72 #define O2_FLAG_L2NORM (0x1U)
mas01cr@0 73 #define O2_FLAG_MINMAX (0x2U)
mas01cr@142 74 #define O2_FLAG_POWER (0x4U)
mas01cr@0 75 #define O2_FLAG_TIMES (0x20U)
mas01cr@0 76
mas01cr@105 77 // Query types
mas01cr@105 78 #define O2_POINT_QUERY (0x4U)
mas01cr@105 79 #define O2_SEQUENCE_QUERY (0x8U)
mas01cr@105 80 #define O2_TRACK_QUERY (0x10U)
mas01cr@105 81
mas01cr@0 82 // Error Codes
mas01cr@0 83 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
mas01cr@0 84
mas01cr@0 85 // Macros
mas01cr@0 86 #define O2_ACTION(a) (strcmp(command,a)==0)
mas01cr@0 87
mas01cr@108 88 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
mas01cr@108 89 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
mas01cr@108 90
mas01cr@0 91 using namespace std;
mas01cr@0 92
mas01cr@0 93 typedef struct dbTableHeader{
mas01cr@114 94 uint32_t magic;
mas01cr@114 95 uint32_t version;
mas01cr@114 96 uint32_t numFiles;
mas01cr@114 97 uint32_t dim;
mas01cr@114 98 uint32_t flags;
mas01cr@111 99 // FIXME: these lengths and offsets should be size_t or off_t, but
mas01cr@111 100 // that causes this header (and hence audioDB files) to be
mas01cr@111 101 // unportable between 32 and 64-bit architectures. Making them
mas01cr@114 102 // uint32_t isn't the real answer, as it means we won't be able to
mas01cr@114 103 // scale to really large collections easily but it works around the
mas01cr@128 104 // problem. Expanding to 64 bits will of course need a change in
mas01cr@128 105 // file format version. -- CSR, 2007-10-05
mas01cr@114 106 uint32_t length;
mas01cr@114 107 uint32_t fileTableOffset;
mas01cr@114 108 uint32_t trackTableOffset;
mas01cr@114 109 uint32_t dataOffset;
mas01cr@114 110 uint32_t l2normTableOffset;
mas01cr@114 111 uint32_t timesTableOffset;
mas01cr@142 112 uint32_t powerTableOffset;
mas01cr@128 113 uint32_t dbSize;
mas01cr@0 114 } dbTableHeaderT, *dbTableHeaderPtr;
mas01cr@0 115
mas01cr@0 116
mas01cr@0 117 class audioDB{
mas01cr@0 118
mas01cr@0 119 private:
mas01cr@0 120 gengetopt_args_info args_info;
mas01cr@0 121 unsigned dim;
mas01cr@0 122 const char *dbName;
mas01cr@0 123 const char *inFile;
mas01cr@0 124 const char *hostport;
mas01cr@0 125 const char *key;
mas01mc@18 126 const char* trackFileName;
mas01mc@18 127 ifstream *trackFile;
mas01cr@0 128 const char *command;
mas01cr@131 129 const char *output;
mas01cr@0 130 const char *timesFileName;
mas01cr@0 131 ifstream *timesFile;
mas01cr@144 132 const char *powerFileName;
mas01cr@145 133 ifstream *powerFile;
mas01cr@144 134 int powerfd;
mas01cr@0 135
mas01cr@0 136 int dbfid;
mas01cr@0 137 int infid;
mas01cr@0 138 char* db;
mas01cr@0 139 char* indata;
mas01cr@0 140 struct stat statbuf;
mas01cr@0 141 dbTableHeaderPtr dbH;
mas01cr@0 142
mas01cr@0 143 char *fileTable;
mas01mc@18 144 unsigned* trackTable;
mas01cr@0 145 double* dataBuf;
mas01cr@0 146 double* inBuf;
mas01cr@0 147 double* l2normTable;
mas01cr@0 148 double* qNorm;
mas01cr@0 149 double* sNorm;
mas01cr@0 150 double* timesTable;
mas01cr@142 151 double* powerTable;
mas01cr@0 152
mas01cr@0 153 // Flags and parameters
mas01cr@0 154 unsigned verbosity; // how much do we want to know?
mas01cr@129 155 unsigned size; // given size (for creation)
mas01cr@0 156 unsigned queryType; // point queries default
mas01cr@0 157 unsigned pointNN; // how many point NNs ?
mas01mc@18 158 unsigned trackNN; // how many track NNs ?
mas01cr@0 159 unsigned sequenceLength;
mas01cr@0 160 unsigned sequenceHop;
mas01cr@0 161 unsigned queryPoint;
mas01cr@0 162 unsigned usingQueryPoint;
mas01cr@0 163 unsigned usingTimes;
mas01cr@144 164 unsigned usingPower;
mas01cr@0 165 unsigned isClient;
mas01cr@0 166 unsigned isServer;
mas01cr@0 167 unsigned port;
mas01cr@0 168 double timesTol;
mas01mc@17 169 double radius;
mas01cr@147 170
mas01cr@147 171 bool use_absolute_threshold;
mas01cr@147 172 double absolute_threshold;
mas01cr@147 173 bool use_relative_threshold;
mas01cr@147 174 double relative_threshold;
mas01cr@147 175
mas01mc@17 176
mas01cr@0 177 // Timers
mas01cr@0 178 struct timeval tv1;
mas01cr@0 179 struct timeval tv2;
mas01cr@0 180
mas01cr@0 181 // private methods
mas01cr@32 182 void error(const char* a, const char* b = "", const char *sysFunc = 0);
mas01cr@133 183 void pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 184 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@148 185 void sequence_sum(double *buffer, int length, int seqlen);
mas01cr@157 186 void sequence_sqrt(double *buffer, int length, int seqlen);
mas01cr@157 187 void sequence_average(double *buffer, int length, int seqlen);
mas01cr@157 188
mas01cr@133 189 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 190 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@0 191
mas01cr@120 192 void initDBHeader(const char *dbName, bool forWrite);
mas01cr@27 193 void initTables(const char* dbName, bool forWrite, const char* inFile);
mas01cr@0 194 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
mas01cr@0 195 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
mas01cr@0 196 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
mas01cr@144 197 void insertPowerData(unsigned n, int powerfd, double *powerdata);
mas01cr@0 198 unsigned getKeyPos(char* key);
mas01cr@0 199 public:
mas01cr@0 200
mas01cr@76 201 audioDB(const unsigned argc, char* const argv[]);
mas01cr@133 202 audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
mas01cr@133 203 audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse);
mas01cr@97 204 void cleanup();
mas01cr@0 205 ~audioDB();
mas01cr@0 206 int processArgs(const unsigned argc, char* const argv[]);
mas01cr@30 207 void get_lock(int fd, bool exclusive);
mas01cr@30 208 void release_lock(int fd);
mas01cr@0 209 void create(const char* dbName);
mas01cr@0 210 void drop();
mas01cr@0 211 void insert(const char* dbName, const char* inFile);
mas01cr@0 212 void batchinsert(const char* dbName, const char* inFile);
mas01cr@133 213 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
mas01cr@133 214 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
mas01cr@0 215 void ws_status(const char*dbName, char* hostport);
mas01mc@18 216 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
mas01cr@0 217 void l2norm(const char* dbName);
mas01cr@146 218 void power_flag(const char *dbName);
mas01cr@147 219 bool powers_acceptable(double p1, double p2);
mas01cr@0 220 void dump(const char* dbName);
mas01cr@0 221
mas01cr@0 222 // web services
mas01cr@0 223 void startServer();
mas01cr@0 224
mas01cr@0 225 };
mas01mc@17 226
mas01cr@105 227 #define O2_AUDIODB_INITIALIZERS \
mas01cr@105 228 dim(0), \
mas01cr@105 229 dbName(0), \
mas01cr@105 230 inFile(0), \
mas01cr@105 231 key(0), \
mas01cr@105 232 trackFileName(0), \
mas01cr@105 233 trackFile(0), \
mas01cr@105 234 command(0), \
mas01cr@131 235 output(0), \
mas01cr@105 236 timesFileName(0), \
mas01cr@105 237 timesFile(0), \
mas01cr@144 238 powerFileName(0), \
mas01cr@145 239 powerFile(0), \
mas01cr@144 240 powerfd(0), \
mas01cr@105 241 dbfid(0), \
mas01cr@105 242 infid(0), \
mas01cr@105 243 db(0), \
mas01cr@105 244 indata(0), \
mas01cr@105 245 dbH(0), \
mas01cr@105 246 fileTable(0), \
mas01cr@105 247 trackTable(0), \
mas01cr@105 248 dataBuf(0), \
mas01cr@105 249 l2normTable(0), \
mas01cr@105 250 qNorm(0), \
mas01cr@105 251 timesTable(0), \
mas01cr@105 252 verbosity(1), \
mas01cr@129 253 size(O2_DEFAULTDBSIZE), \
mas01cr@105 254 queryType(O2_POINT_QUERY), \
mas01cr@105 255 pointNN(O2_DEFAULT_POINTNN), \
mas01cr@105 256 trackNN(O2_DEFAULT_TRACKNN), \
mas01cr@105 257 sequenceLength(16), \
mas01cr@105 258 sequenceHop(1), \
mas01cr@105 259 queryPoint(0), \
mas01cr@105 260 usingQueryPoint(0), \
mas01cr@105 261 usingTimes(0), \
mas01cr@144 262 usingPower(0), \
mas01cr@105 263 isClient(0), \
mas01cr@105 264 isServer(0), \
mas01cr@105 265 port(0), \
mas01cr@105 266 timesTol(0.1), \
mas01cr@147 267 radius(0), \
mas01cr@147 268 use_absolute_threshold(false), \
mas01cr@147 269 absolute_threshold(0.0), \
mas01cr@147 270 use_relative_threshold(false), \
mas01cr@147 271 relative_threshold(0.0)