comparison audioDB.h @ 196:8c81cacf5aab

Merge -r228:254 from no-big-mmap branch. Although the last log message from that branch only mentioned working create and status (-N and -S), it turned out that I seemed to have done everything right for dump and search on huge DBs to work too. Additionally: * bump the DB format version; * CHECKED_MMAP() for the powerTable; * move the powerTable above the timesTable, so that all the code everywhere which computes the length of the data buffer assuming that the timesTable is the next thing on the disk still works.
author mas01cr
date Fri, 23 Nov 2007 11:08:15 +0000
parents f9d16137e704
children 72a037f2b1e4
comparison
equal deleted inserted replaced
194:0e75deb7d4d1 196:8c81cacf5aab
48 #define COM_RELATIVE_THRESH "--relative-threshold" 48 #define COM_RELATIVE_THRESH "--relative-threshold"
49 #define COM_ABSOLUTE_THRESH "--absolute-threshold" 49 #define COM_ABSOLUTE_THRESH "--absolute-threshold"
50 50
51 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24) 51 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
52 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24) 52 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
53 #define O2_FORMAT_VERSION (1U) 53 #define O2_FORMAT_VERSION (2U)
54 54
55 #define O2_DEFAULT_POINTNN (10U) 55 #define O2_DEFAULT_POINTNN (10U)
56 #define O2_DEFAULT_TRACKNN (10U) 56 #define O2_DEFAULT_TRACKNN (10U)
57 57
58 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size 58 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
86 #define O2_ACTION(a) (strcmp(command,a)==0) 86 #define O2_ACTION(a) (strcmp(command,a)==0)
87 87
88 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1)) 88 #define ALIGN_UP(x,w) ((x) + ((1<<w)-1) & ~((1<<w)-1))
89 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1)) 89 #define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
90 90
91 #define ALIGN_PAGE_UP(x) ((x) + (getpagesize()-1) & ~(getpagesize()-1))
92 #define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
93
91 #define ENSURE_STRING(x) ((x) ? (x) : "") 94 #define ENSURE_STRING(x) ((x) ? (x) : "")
92 95
93 using namespace std; 96 using namespace std;
94 97
95 typedef struct dbTableHeader{ 98 typedef struct dbTableHeader{
96 uint32_t magic; 99 uint32_t magic;
97 uint32_t version; 100 uint32_t version;
98 uint32_t numFiles; 101 uint32_t numFiles;
99 uint32_t dim; 102 uint32_t dim;
100 uint32_t flags; 103 uint32_t flags;
101 // FIXME: these lengths and offsets should be size_t or off_t, but 104 off_t length;
102 // that causes this header (and hence audioDB files) to be 105 off_t fileTableOffset;
103 // unportable between 32 and 64-bit architectures. Making them 106 off_t trackTableOffset;
104 // uint32_t isn't the real answer, as it means we won't be able to 107 off_t dataOffset;
105 // scale to really large collections easily but it works around the 108 off_t l2normTableOffset;
106 // problem. Expanding to 64 bits will of course need a change in 109 off_t timesTableOffset;
107 // file format version. -- CSR, 2007-10-05 110 off_t powerTableOffset;
108 uint32_t length; 111 off_t dbSize;
109 uint32_t fileTableOffset;
110 uint32_t trackTableOffset;
111 uint32_t dataOffset;
112 uint32_t l2normTableOffset;
113 uint32_t timesTableOffset;
114 uint32_t powerTableOffset;
115 uint32_t dbSize;
116 } dbTableHeaderT, *dbTableHeaderPtr; 112 } dbTableHeaderT, *dbTableHeaderPtr;
117 113
118 114
119 class audioDB{ 115 class audioDB{
120 116
134 const char *powerFileName; 130 const char *powerFileName;
135 ifstream *powerFile; 131 ifstream *powerFile;
136 int powerfd; 132 int powerfd;
137 133
138 int dbfid; 134 int dbfid;
135 bool forWrite;
139 int infid; 136 int infid;
140 char* db; 137 char* db;
141 char* indata; 138 char* indata;
142 struct stat statbuf; 139 struct stat statbuf;
143 dbTableHeaderPtr dbH; 140 dbTableHeaderPtr dbH;
147 double* dataBuf; 144 double* dataBuf;
148 double* inBuf; 145 double* inBuf;
149 double* l2normTable; 146 double* l2normTable;
150 double* qNorm; 147 double* qNorm;
151 double* sNorm; 148 double* sNorm;
152 double* timesTable; 149 double* timesTable;
153 double* powerTable; 150 double* powerTable;
151
152 size_t fileTableLength;
153 size_t trackTableLength;
154 off_t dataBufLength;
155 size_t timesTableLength;
156 size_t powerTableLength;
157 size_t l2normTableLength;
154 158
155 // Flags and parameters 159 // Flags and parameters
156 unsigned verbosity; // how much do we want to know? 160 unsigned verbosity; // how much do we want to know?
157 unsigned size; // given size (for creation) 161 off_t size; // given size (for creation)
158 unsigned queryType; // point queries default 162 unsigned queryType; // point queries default
159 unsigned pointNN; // how many point NNs ? 163 unsigned pointNN; // how many point NNs ?
160 unsigned trackNN; // how many track NNs ? 164 unsigned trackNN; // how many track NNs ?
161 unsigned sequenceLength; 165 unsigned sequenceLength;
162 unsigned sequenceHop; 166 unsigned sequenceHop;
189 void sequence_average(double *buffer, int length, int seqlen); 193 void sequence_average(double *buffer, int length, int seqlen);
190 194
191 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); 195 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
192 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); 196 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
193 197
194 void initDBHeader(const char *dbName, bool forWrite); 198 void initDBHeader(const char *dbName);
195 void initInputFile(const char *inFile); 199 void initInputFile(const char *inFile);
196 void initTables(const char* dbName, bool forWrite, const char* inFile); 200 void initTables(const char* dbName, const char* inFile);
197 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); 201 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
198 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append); 202 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
199 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata); 203 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
200 void insertPowerData(unsigned n, int powerfd, double *powerdata); 204 void insertPowerData(unsigned n, int powerfd, double *powerdata);
201 unsigned getKeyPos(char* key); 205 unsigned getKeyPos(char* key);
209 int processArgs(const unsigned argc, char* const argv[]); 213 int processArgs(const unsigned argc, char* const argv[]);
210 void get_lock(int fd, bool exclusive); 214 void get_lock(int fd, bool exclusive);
211 void release_lock(int fd); 215 void release_lock(int fd);
212 void create(const char* dbName); 216 void create(const char* dbName);
213 void drop(); 217 void drop();
218 bool enough_data_space_free(off_t size);
219 void insert_data_vectors(off_t offset, void *buffer, size_t size);
214 void insert(const char* dbName, const char* inFile); 220 void insert(const char* dbName, const char* inFile);
215 void batchinsert(const char* dbName, const char* inFile); 221 void batchinsert(const char* dbName, const char* inFile);
216 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); 222 void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
217 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0); 223 void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
218 void ws_status(const char*dbName, char* hostport); 224 void ws_status(const char*dbName, char* hostport);
240 timesFile(0), \ 246 timesFile(0), \
241 powerFileName(0), \ 247 powerFileName(0), \
242 powerFile(0), \ 248 powerFile(0), \
243 powerfd(0), \ 249 powerfd(0), \
244 dbfid(0), \ 250 dbfid(0), \
251 forWrite(false), \
245 infid(0), \ 252 infid(0), \
246 db(0), \ 253 db(0), \
247 indata(0), \ 254 indata(0), \
248 dbH(0), \ 255 dbH(0), \
249 fileTable(0), \ 256 fileTable(0), \
250 trackTable(0), \ 257 trackTable(0), \
251 dataBuf(0), \ 258 dataBuf(0), \
252 l2normTable(0), \ 259 l2normTable(0), \
253 qNorm(0), \ 260 qNorm(0), \
254 timesTable(0), \ 261 timesTable(0), \
262 fileTableLength(0), \
263 trackTableLength(0), \
264 dataBufLength(0), \
265 timesTableLength(0), \
266 powerTableLength(0), \
267 l2normTableLength(0), \
255 verbosity(1), \ 268 verbosity(1), \
256 size(O2_DEFAULTDBSIZE), \ 269 size(O2_DEFAULTDBSIZE), \
257 queryType(O2_POINT_QUERY), \ 270 queryType(O2_POINT_QUERY), \
258 pointNN(O2_DEFAULT_POINTNN), \ 271 pointNN(O2_DEFAULT_POINTNN), \
259 trackNN(O2_DEFAULT_TRACKNN), \ 272 trackNN(O2_DEFAULT_TRACKNN), \