mas01cr@239
|
1 #include "audioDB.h"
|
mas01cr@239
|
2
|
mas01cr@251
|
3 bool audioDB::enough_per_file_space_free() {
|
mas01cr@251
|
4 unsigned int fmaxfiles, tmaxfiles;
|
mas01cr@251
|
5 unsigned int maxfiles;
|
mas01cr@251
|
6
|
mas01cr@256
|
7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
|
mas01cr@256
|
8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
|
mas01cr@251
|
9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
|
mas01cr@251
|
10 return(dbH->numFiles < maxfiles);
|
mas01cr@251
|
11 }
|
mas01cr@251
|
12
|
mas01cr@239
|
13 bool audioDB::enough_data_space_free(off_t size) {
|
mas01mc@324
|
14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
|
mas01cr@239
|
15 }
|
mas01cr@239
|
16
|
mas01cr@239
|
17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
|
mas01cr@366
|
18 if(lseek(dbfid, dbH->dataOffset + offset, SEEK_SET) == (off_t) -1) {
|
mas01cr@366
|
19 error("error seeking to offset", "", "lseek");
|
mas01cr@366
|
20 }
|
mas01cr@366
|
21 CHECKED_WRITE(dbfid, buffer, size);
|
mas01cr@239
|
22 }
|
mas01cr@239
|
23
|
mas01cr@239
|
24 void audioDB::insert(const char* dbName, const char* inFile) {
|
mas01cr@239
|
25 forWrite = true;
|
mas01cr@239
|
26 initTables(dbName, inFile);
|
mas01cr@239
|
27
|
mas01mc@324
|
28 if(dbH->flags & O2_FLAG_LARGE_ADB)
|
mas01mc@324
|
29 error("Single-feature inserts not allowed with LARGE audioDB instances");
|
mas01mc@324
|
30
|
mas01cr@239
|
31 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@239
|
32 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@239
|
33
|
mas01cr@239
|
34 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@239
|
35 error("Must use power with power-enabled database", dbName);
|
mas01cr@239
|
36
|
mas01cr@251
|
37 if(!enough_per_file_space_free()) {
|
mas01cr@251
|
38 error("Insert failed: no more room for metadata", inFile);
|
mas01cr@251
|
39 }
|
mas01cr@251
|
40
|
mas01cr@239
|
41 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@239
|
42 error("Insert failed: no more room in database", inFile);
|
mas01cr@239
|
43 }
|
mas01cr@239
|
44
|
mas01cr@239
|
45 if(!key)
|
mas01cr@239
|
46 key=inFile;
|
mas01cr@239
|
47 // Linear scan of filenames check for pre-existing feature
|
mas01cr@239
|
48 unsigned alreadyInserted=0;
|
mas01cr@239
|
49 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@256
|
50 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
|
mas01cr@239
|
51 alreadyInserted=1;
|
mas01cr@239
|
52 break;
|
mas01cr@239
|
53 }
|
mas01cr@239
|
54
|
mas01cr@239
|
55 if(alreadyInserted) {
|
mas01cr@239
|
56 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
|
mas01mc@324
|
57 // FIXME: Do we need to munmap here (see below) ? MKC 18/08/08
|
mas01cr@239
|
58 return;
|
mas01cr@239
|
59 }
|
mas01cr@239
|
60
|
mas01cr@239
|
61 // Make a track index table of features to file indexes
|
mas01cr@239
|
62 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@239
|
63 if(!numVectors) {
|
mas01cr@239
|
64 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
|
mas01cr@239
|
65
|
mas01cr@239
|
66 // CLEAN UP
|
mas01cr@239
|
67 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
68 munmap(db,dbH->dbSize);
|
mas01cr@239
|
69 close(infid);
|
mas01cr@239
|
70 return;
|
mas01cr@239
|
71 }
|
mas01cr@239
|
72
|
mas01mc@324
|
73 INSERT_FILETABLE_STRING(fileTable, key);
|
mas01cr@239
|
74
|
mas01cr@239
|
75 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@239
|
76
|
mas01cr@239
|
77 // Check times status and insert times from file
|
mas01cr@239
|
78 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
|
mas01cr@239
|
79 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@239
|
80
|
mas01cr@239
|
81 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@239
|
82 error("out of space for times", key);
|
mas01cr@239
|
83 }
|
mas01cr@239
|
84
|
mas01cr@239
|
85 if (usingTimes) {
|
mas01cr@239
|
86 insertTimeStamps(numVectors, timesFile, timesdata);
|
mas01cr@239
|
87 }
|
mas01cr@239
|
88
|
mas01cr@239
|
89 double *powerdata = powerTable + indexoffset;
|
mas01cr@239
|
90 insertPowerData(numVectors, powerfd, powerdata);
|
mas01cr@239
|
91
|
mas01cr@239
|
92 // Increment file count
|
mas01cr@239
|
93 dbH->numFiles++;
|
mas01cr@239
|
94
|
mas01cr@239
|
95 // Update Header information
|
mas01cr@239
|
96 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@239
|
97
|
mas01cr@239
|
98 // Update track to file index map
|
mas01cr@239
|
99 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
|
mas01cr@239
|
100
|
mas01cr@239
|
101 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@239
|
102
|
mas01cr@239
|
103 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@239
|
104 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@239
|
105 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@239
|
106
|
mas01cr@239
|
107 // Report status
|
mas01cr@239
|
108 status(dbName);
|
mas01cr@239
|
109 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
|
mas01cr@239
|
110
|
mas01cr@239
|
111 // Copy the header back to the database
|
mas01cr@239
|
112 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@239
|
113
|
mas01cr@239
|
114 // CLEAN UP
|
mas01cr@239
|
115 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
116 close(infid);
|
mas01cr@239
|
117 }
|
mas01cr@239
|
118
|
mas01cr@239
|
119 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
|
mas01cr@239
|
120 assert(usingTimes);
|
mas01cr@239
|
121
|
mas01cr@239
|
122 unsigned numtimes = 0;
|
mas01cr@239
|
123
|
mas01cr@239
|
124 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
|
mas01cr@239
|
125 dbH->flags=dbH->flags|O2_FLAG_TIMES;
|
mas01cr@239
|
126 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
|
mas01cr@239
|
127 error("Timestamp file used with non-timestamped database", timesFileName);
|
mas01cr@239
|
128 }
|
mas01cr@239
|
129
|
mas01cr@239
|
130 if(!timesFile->is_open()) {
|
mas01cr@239
|
131 error("problem opening times file on timestamped database", timesFileName);
|
mas01cr@239
|
132 }
|
mas01cr@239
|
133
|
mas01cr@239
|
134 double timepoint, next;
|
mas01cr@239
|
135 *timesFile >> timepoint;
|
mas01cr@239
|
136 if (timesFile->eof()) {
|
mas01cr@239
|
137 error("no entries in times file", timesFileName);
|
mas01cr@239
|
138 }
|
mas01cr@239
|
139 numtimes++;
|
mas01cr@239
|
140 do {
|
mas01cr@239
|
141 *timesFile >> next;
|
mas01cr@239
|
142 if (timesFile->eof()) {
|
mas01cr@239
|
143 break;
|
mas01cr@239
|
144 }
|
mas01cr@239
|
145 numtimes++;
|
mas01cr@239
|
146 timesdata[0] = timepoint;
|
mas01cr@239
|
147 timepoint = (timesdata[1] = next);
|
mas01cr@239
|
148 timesdata += 2;
|
mas01cr@239
|
149 } while (numtimes < numVectors + 1);
|
mas01cr@239
|
150
|
mas01cr@239
|
151 if (numtimes < numVectors + 1) {
|
mas01cr@239
|
152 error("too few timepoints in times file", timesFileName);
|
mas01cr@239
|
153 }
|
mas01cr@239
|
154
|
mas01cr@239
|
155 *timesFile >> next;
|
mas01cr@239
|
156 if (!timesFile->eof()) {
|
mas01cr@239
|
157 error("too many timepoints in times file", timesFileName);
|
mas01cr@239
|
158 }
|
mas01cr@239
|
159 }
|
mas01cr@239
|
160
|
mas01cr@239
|
161 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
|
mas01mc@324
|
162 if(usingPower){
|
mas01cr@239
|
163 if (!(dbH->flags & O2_FLAG_POWER)) {
|
mas01cr@239
|
164 error("Cannot insert power data on non-power DB", dbName);
|
mas01cr@239
|
165 }
|
mas01mc@324
|
166
|
mas01cr@239
|
167 int one;
|
mas01cr@239
|
168 unsigned int count;
|
mas01mc@324
|
169
|
mas01cr@239
|
170 count = read(powerfd, &one, sizeof(unsigned int));
|
mas01cr@239
|
171 if (count != sizeof(unsigned int)) {
|
mas01cr@239
|
172 error("powerfd read failed", "int", "read");
|
mas01cr@239
|
173 }
|
mas01cr@239
|
174 if (one != 1) {
|
mas01cr@239
|
175 error("dimensionality of power file not 1", powerFileName);
|
mas01cr@239
|
176 }
|
mas01mc@324
|
177
|
mas01cr@239
|
178 // FIXME: should check that the powerfile is the right size for
|
mas01cr@239
|
179 // this. -- CSR, 2007-10-30
|
mas01cr@239
|
180 count = read(powerfd, powerdata, numVectors * sizeof(double));
|
mas01cr@239
|
181 if (count != numVectors * sizeof(double)) {
|
mas01cr@239
|
182 error("powerfd read failed", "double", "read");
|
mas01cr@239
|
183 }
|
mas01cr@239
|
184 }
|
mas01cr@239
|
185 }
|
mas01cr@239
|
186
|
mas01cr@239
|
187 void audioDB::batchinsert(const char* dbName, const char* inFile) {
|
mas01cr@239
|
188
|
mas01cr@239
|
189 forWrite = true;
|
mas01cr@239
|
190 initDBHeader(dbName);
|
mas01cr@239
|
191
|
mas01mc@324
|
192 // Treat large ADB instances differently
|
mas01mc@324
|
193 if( dbH->flags & O2_FLAG_LARGE_ADB ){
|
mas01mc@324
|
194 batchinsert_large_adb(dbName, inFile) ;
|
mas01mc@324
|
195 return;
|
mas01mc@324
|
196 }
|
mas01mc@324
|
197
|
mas01cr@239
|
198 if(!key)
|
mas01cr@239
|
199 key=inFile;
|
mas01cr@239
|
200 std::ifstream *filesIn = 0;
|
mas01cr@239
|
201 std::ifstream *keysIn = 0;
|
mas01cr@239
|
202 std::ifstream* thisTimesFile = 0;
|
mas01cr@239
|
203 int thispowerfd = 0;
|
mas01cr@239
|
204
|
mas01cr@239
|
205 if(!(filesIn = new std::ifstream(inFile)))
|
mas01cr@239
|
206 error("Could not open batch in file", inFile);
|
mas01cr@239
|
207 if(key && key!=inFile)
|
mas01cr@239
|
208 if(!(keysIn = new std::ifstream(key)))
|
mas01cr@239
|
209 error("Could not open batch key file",key);
|
mas01cr@239
|
210
|
mas01cr@239
|
211 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@239
|
212 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@239
|
213
|
mas01cr@239
|
214 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@239
|
215 error("Must use power with power-enabled database", dbName);
|
mas01cr@239
|
216
|
mas01cr@239
|
217 unsigned totalVectors=0;
|
mas01cr@239
|
218 char *thisFile = new char[MAXSTR];
|
mas01cr@262
|
219 char *thisKey = 0;
|
mas01cr@262
|
220 if (key && (key != inFile)) {
|
mas01cr@262
|
221 thisKey = new char[MAXSTR];
|
mas01cr@262
|
222 }
|
mas01cr@239
|
223 char *thisTimesFileName = new char[MAXSTR];
|
mas01cr@239
|
224 char *thisPowerFileName = new char[MAXSTR];
|
mas01cr@302
|
225
|
mas01cr@302
|
226 std::set<std::string> s;
|
mas01cr@302
|
227
|
mas01cr@302
|
228 for (unsigned k = 0; k < dbH->numFiles; k++) {
|
mas01cr@302
|
229 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
|
mas01cr@302
|
230 }
|
mas01cr@302
|
231
|
mas01cr@302
|
232 do {
|
mas01cr@239
|
233 filesIn->getline(thisFile,MAXSTR);
|
mas01cr@262
|
234 if(key && key!=inFile) {
|
mas01cr@239
|
235 keysIn->getline(thisKey,MAXSTR);
|
mas01cr@262
|
236 } else {
|
mas01cr@239
|
237 thisKey = thisFile;
|
mas01cr@262
|
238 }
|
mas01cr@262
|
239 if(usingTimes) {
|
mas01cr@262
|
240 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01cr@262
|
241 }
|
mas01cr@262
|
242 if(usingPower) {
|
mas01cr@239
|
243 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01cr@262
|
244 }
|
mas01cr@239
|
245
|
mas01cr@262
|
246 if(filesIn->eof()) {
|
mas01cr@239
|
247 break;
|
mas01cr@262
|
248 }
|
mas01cr@239
|
249 initInputFile(thisFile);
|
mas01cr@239
|
250
|
mas01cr@251
|
251 if(!enough_per_file_space_free()) {
|
mas01cr@251
|
252 error("batchinsert failed: no more room for metadata", thisFile);
|
mas01cr@251
|
253 }
|
mas01cr@251
|
254
|
mas01cr@239
|
255 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@239
|
256 error("batchinsert failed: no more room in database", thisFile);
|
mas01cr@239
|
257 }
|
mas01cr@239
|
258
|
mas01cr@302
|
259 if(s.count(thisKey)) {
|
mas01cr@239
|
260 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
|
mas01cr@239
|
261 } else {
|
mas01cr@302
|
262 s.insert(thisKey);
|
mas01cr@239
|
263 // Make a track index table of features to file indexes
|
mas01cr@239
|
264 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@239
|
265 if(!numVectors) {
|
mas01cr@239
|
266 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
|
mas01cr@239
|
267 }
|
mas01cr@239
|
268 else{
|
mas01cr@239
|
269 if(usingTimes){
|
mas01cr@239
|
270 if(timesFile->eof()) {
|
mas01cr@239
|
271 error("not enough timestamp files in timesList", timesFileName);
|
mas01cr@239
|
272 }
|
mas01cr@239
|
273 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01cr@239
|
274 if(!thisTimesFile->is_open()) {
|
mas01cr@239
|
275 error("Cannot open timestamp file", thisTimesFileName);
|
mas01cr@239
|
276 }
|
mas01cr@239
|
277 off_t insertoffset = dbH->length;
|
mas01cr@239
|
278 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
|
mas01cr@239
|
279 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@239
|
280 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@239
|
281 error("out of space for times", key);
|
mas01cr@239
|
282 }
|
mas01cr@239
|
283 insertTimeStamps(numVectors, thisTimesFile, timesdata);
|
mas01cr@239
|
284 if(thisTimesFile)
|
mas01cr@239
|
285 delete thisTimesFile;
|
mas01cr@239
|
286 }
|
mas01cr@239
|
287
|
mas01cr@239
|
288 if (usingPower) {
|
mas01cr@239
|
289 if(powerFile->eof()) {
|
mas01cr@239
|
290 error("not enough power files in powerList", powerFileName);
|
mas01cr@239
|
291 }
|
mas01cr@239
|
292 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01cr@239
|
293 if (thispowerfd < 0) {
|
mas01cr@239
|
294 error("failed to open power file", thisPowerFileName);
|
mas01cr@239
|
295 }
|
mas01cr@239
|
296 off_t insertoffset = dbH->length;
|
mas01cr@239
|
297 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
|
mas01cr@239
|
298 double *powerdata = powerTable + poweroffset;
|
mas01cr@239
|
299 insertPowerData(numVectors, thispowerfd, powerdata);
|
mas01cr@239
|
300 if (0 < thispowerfd) {
|
mas01cr@239
|
301 close(thispowerfd);
|
mas01cr@239
|
302 }
|
mas01cr@239
|
303 }
|
mas01mc@324
|
304
|
mas01mc@324
|
305 INSERT_FILETABLE_STRING(fileTable, thisKey);
|
mas01mc@324
|
306
|
mas01cr@239
|
307 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@239
|
308
|
mas01cr@239
|
309 // Increment file count
|
mas01cr@239
|
310 dbH->numFiles++;
|
mas01cr@239
|
311
|
mas01cr@239
|
312 // Update Header information
|
mas01cr@239
|
313 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@239
|
314
|
mas01cr@239
|
315 // Update track to file index map
|
mas01cr@239
|
316 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01mc@324
|
317
|
mas01cr@239
|
318 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@239
|
319
|
mas01cr@239
|
320 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@239
|
321 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@239
|
322 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@239
|
323
|
mas01cr@239
|
324 totalVectors+=numVectors;
|
mas01cr@239
|
325
|
mas01cr@239
|
326 // Copy the header back to the database
|
mas01cr@239
|
327 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@239
|
328 }
|
mas01cr@239
|
329 }
|
mas01cr@239
|
330 // CLEAN UP
|
mas01cr@239
|
331 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
332 close(infid);
|
mas01cr@239
|
333 } while(!filesIn->eof());
|
mas01cr@239
|
334
|
mas01cr@239
|
335 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01cr@262
|
336
|
mas01cr@262
|
337 delete [] thisPowerFileName;
|
mas01cr@262
|
338 if(key && (key != inFile)) {
|
mas01cr@262
|
339 delete [] thisKey;
|
mas01cr@262
|
340 }
|
mas01cr@262
|
341 delete [] thisFile;
|
mas01cr@262
|
342 delete [] thisTimesFileName;
|
mas01cr@239
|
343
|
mas01cr@262
|
344 delete filesIn;
|
mas01cr@262
|
345 delete keysIn;
|
mas01cr@262
|
346
|
mas01cr@239
|
347 // Report status
|
mas01cr@239
|
348 status(dbName);
|
mas01cr@239
|
349 }
|
mas01mc@324
|
350
|
mas01mc@324
|
351
|
mas01mc@324
|
352 // BATCHINSERT_LARGE_ADB
|
mas01mc@324
|
353 //
|
mas01mc@324
|
354 // This method inserts file pointers into the ADB instance rather than the actual feature data
|
mas01mc@324
|
355 //
|
mas01mc@324
|
356 // This method is intended for databases that are large enough to only support indexed query
|
mas01mc@324
|
357 // So exhaustive searching across all feature vectors will not be performed
|
mas01mc@324
|
358 //
|
mas01mc@324
|
359 // We insert featureFileName, [powerFileName], [timesFileName]
|
mas01mc@324
|
360 //
|
mas01mc@324
|
361 // l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
|
mas01mc@324
|
362 //
|
mas01mc@324
|
363 // LIMITS:
|
mas01mc@324
|
364 //
|
mas01mc@324
|
365 // We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
|
mas01mc@324
|
366 //
|
mas01mc@324
|
367 void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
|
mas01mc@324
|
368
|
mas01mc@324
|
369 if(!key)
|
mas01mc@324
|
370 key=inFile;
|
mas01mc@324
|
371 std::ifstream *filesIn = 0;
|
mas01mc@324
|
372 std::ifstream *keysIn = 0;
|
mas01mc@324
|
373 std::ifstream* thisTimesFile = 0;
|
mas01mc@324
|
374 int thispowerfd = 0;
|
mas01mc@324
|
375
|
mas01mc@324
|
376 if(!(filesIn = new std::ifstream(inFile)))
|
mas01mc@324
|
377 error("Could not open batch in file", inFile);
|
mas01mc@324
|
378 if(key && key!=inFile)
|
mas01mc@324
|
379 if(!(keysIn = new std::ifstream(key)))
|
mas01mc@324
|
380 error("Could not open batch key file",key);
|
mas01mc@324
|
381
|
mas01mc@324
|
382 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01mc@324
|
383 error("Must use timestamps with timestamped database","use --times");
|
mas01mc@324
|
384
|
mas01mc@324
|
385 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01mc@324
|
386 error("Must use power with power-enabled database", dbName);
|
mas01mc@324
|
387
|
mas01mc@324
|
388 unsigned totalVectors=0;
|
mas01mc@324
|
389 char *thisFile = new char[MAXSTR];
|
mas01mc@324
|
390 char *thisKey = 0;
|
mas01mc@324
|
391 if (key && (key != inFile)) {
|
mas01mc@324
|
392 thisKey = new char[MAXSTR];
|
mas01mc@324
|
393 }
|
mas01mc@324
|
394 char *thisTimesFileName = new char[MAXSTR];
|
mas01mc@324
|
395 char *thisPowerFileName = new char[MAXSTR];
|
mas01mc@324
|
396
|
mas01mc@324
|
397 std::set<std::string> s;
|
mas01mc@324
|
398
|
mas01mc@324
|
399 for (unsigned k = 0; k < dbH->numFiles; k++) {
|
mas01mc@324
|
400 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
|
mas01mc@324
|
401 }
|
mas01mc@324
|
402
|
mas01mc@324
|
403 do {
|
mas01mc@324
|
404 filesIn->getline(thisFile,MAXSTR);
|
mas01mc@324
|
405 if(key && key!=inFile) {
|
mas01mc@324
|
406 keysIn->getline(thisKey,MAXSTR);
|
mas01mc@324
|
407 } else {
|
mas01mc@324
|
408 thisKey = thisFile;
|
mas01mc@324
|
409 }
|
mas01mc@324
|
410 if(usingTimes) {
|
mas01mc@324
|
411 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01mc@324
|
412 }
|
mas01mc@324
|
413 if(usingPower) {
|
mas01mc@324
|
414 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01mc@324
|
415 }
|
mas01mc@324
|
416
|
mas01mc@324
|
417 if(filesIn->eof()) {
|
mas01mc@324
|
418 break;
|
mas01mc@324
|
419 }
|
mas01mc@324
|
420
|
mas01mc@324
|
421 initInputFile(thisFile, false);
|
mas01mc@324
|
422
|
mas01mc@324
|
423 if(!enough_per_file_space_free()) {
|
mas01mc@324
|
424 error("batchinsert failed: no more room for metadata", thisFile);
|
mas01mc@324
|
425 }
|
mas01mc@324
|
426
|
mas01mc@324
|
427 if(s.count(thisKey)) {
|
mas01mc@324
|
428 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
|
mas01mc@324
|
429 } else {
|
mas01mc@324
|
430 s.insert(thisKey);
|
mas01mc@324
|
431 // Make a track index table of features to file indexes
|
mas01mc@324
|
432 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01mc@324
|
433 if(!numVectors) {
|
mas01mc@324
|
434 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
|
mas01mc@324
|
435 }
|
mas01mc@324
|
436 else{
|
mas01mc@324
|
437 // Check that time-stamp file exists
|
mas01mc@324
|
438 if(usingTimes){
|
mas01mc@324
|
439 if(timesFile->eof()) {
|
mas01mc@324
|
440 error("not enough timestamp files in timesList", timesFileName);
|
mas01mc@324
|
441 }
|
mas01mc@324
|
442 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01mc@324
|
443 if(!thisTimesFile->is_open()) {
|
mas01mc@324
|
444 error("Cannot open timestamp file", thisTimesFileName);
|
mas01mc@324
|
445 }
|
mas01mc@324
|
446 if(thisTimesFile)
|
mas01mc@324
|
447 delete thisTimesFile;
|
mas01mc@324
|
448 }
|
mas01mc@324
|
449
|
mas01mc@324
|
450 // Check that power file exists
|
mas01mc@324
|
451 if (usingPower) {
|
mas01mc@324
|
452 if(powerFile->eof()) {
|
mas01mc@324
|
453 error("not enough power files in powerList", powerFileName);
|
mas01mc@324
|
454 }
|
mas01mc@324
|
455 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01mc@324
|
456 if (thispowerfd < 0) {
|
mas01mc@324
|
457 error("failed to open power file", thisPowerFileName);
|
mas01mc@324
|
458 }
|
mas01mc@324
|
459 if (0 < thispowerfd) {
|
mas01mc@324
|
460 close(thispowerfd);
|
mas01mc@324
|
461 }
|
mas01mc@324
|
462 }
|
mas01mc@324
|
463
|
mas01mc@324
|
464 // persist links to the feature files for reading from filesystem later
|
mas01mc@324
|
465
|
mas01mc@324
|
466 // Primary Keys
|
mas01mc@324
|
467 INSERT_FILETABLE_STRING(fileTable, thisKey);
|
mas01mc@324
|
468
|
mas01mc@324
|
469 // Feature Vector fileNames
|
mas01mc@324
|
470 INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
|
mas01mc@324
|
471
|
mas01mc@324
|
472 // Time Stamp fileNames
|
mas01mc@324
|
473 if(usingTimes)
|
mas01mc@324
|
474 INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
|
mas01mc@324
|
475
|
mas01mc@324
|
476
|
mas01mc@324
|
477 // Power fileNames
|
mas01mc@324
|
478 if(usingPower)
|
mas01mc@324
|
479 INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
|
mas01mc@324
|
480
|
mas01mc@324
|
481 // Increment file count
|
mas01mc@324
|
482 dbH->numFiles++;
|
mas01mc@324
|
483
|
mas01mc@324
|
484 // Update Header information
|
mas01mc@324
|
485 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01mc@324
|
486
|
mas01mc@324
|
487 // Update track to file index map
|
mas01mc@324
|
488 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01mc@324
|
489
|
mas01mc@324
|
490 totalVectors+=numVectors;
|
mas01mc@324
|
491
|
mas01mc@324
|
492 // Copy the header back to the database
|
mas01mc@324
|
493 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01mc@324
|
494 }
|
mas01mc@324
|
495 }
|
mas01mc@324
|
496 // CLEAN UP
|
mas01mc@324
|
497 if(indata)
|
mas01mc@324
|
498 munmap(indata,statbuf.st_size);
|
mas01mc@324
|
499 if(infid>0)
|
mas01mc@324
|
500 close(infid);
|
mas01mc@324
|
501 } while(!filesIn->eof());
|
mas01mc@324
|
502
|
mas01mc@324
|
503 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01mc@324
|
504
|
mas01mc@324
|
505 delete [] thisPowerFileName;
|
mas01mc@324
|
506 if(key && (key != inFile)) {
|
mas01mc@324
|
507 delete [] thisKey;
|
mas01mc@324
|
508 }
|
mas01mc@324
|
509 delete [] thisFile;
|
mas01mc@324
|
510 delete [] thisTimesFileName;
|
mas01mc@324
|
511
|
mas01mc@324
|
512 delete filesIn;
|
mas01mc@324
|
513 delete keysIn;
|
mas01mc@324
|
514
|
mas01mc@324
|
515 // Report status
|
mas01mc@324
|
516 status(dbName);
|
mas01mc@324
|
517 }
|