mas01cr@239
|
1 #include "audioDB.h"
|
mas01cr@239
|
2
|
mas01cr@251
|
3 bool audioDB::enough_per_file_space_free() {
|
mas01cr@251
|
4 unsigned int fmaxfiles, tmaxfiles;
|
mas01cr@251
|
5 unsigned int maxfiles;
|
mas01cr@251
|
6
|
mas01cr@256
|
7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
|
mas01cr@256
|
8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
|
mas01cr@251
|
9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
|
mas01cr@251
|
10 return(dbH->numFiles < maxfiles);
|
mas01cr@251
|
11 }
|
mas01cr@251
|
12
|
mas01cr@239
|
13 bool audioDB::enough_data_space_free(off_t size) {
|
mas01mc@316
|
14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
|
mas01cr@239
|
15 }
|
mas01cr@239
|
16
|
mas01cr@239
|
17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
|
mas01cr@239
|
18 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
|
mas01cr@239
|
19 write(dbfid, buffer, size);
|
mas01cr@239
|
20 }
|
mas01cr@239
|
21
|
mas01cr@239
|
22 void audioDB::insert(const char* dbName, const char* inFile) {
|
mas01cr@239
|
23 forWrite = true;
|
mas01cr@239
|
24 initTables(dbName, inFile);
|
mas01cr@239
|
25
|
mas01mc@316
|
26 if(dbH->flags & O2_FLAG_LARGE_ADB)
|
mas01mc@316
|
27 error("Single-feature inserts not allowed with LARGE audioDB instances");
|
mas01mc@316
|
28
|
mas01cr@239
|
29 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@239
|
30 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@239
|
31
|
mas01cr@239
|
32 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@239
|
33 error("Must use power with power-enabled database", dbName);
|
mas01cr@239
|
34
|
mas01cr@251
|
35 if(!enough_per_file_space_free()) {
|
mas01cr@251
|
36 error("Insert failed: no more room for metadata", inFile);
|
mas01cr@251
|
37 }
|
mas01cr@251
|
38
|
mas01cr@239
|
39 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@239
|
40 error("Insert failed: no more room in database", inFile);
|
mas01cr@239
|
41 }
|
mas01cr@239
|
42
|
mas01cr@239
|
43 if(!key)
|
mas01cr@239
|
44 key=inFile;
|
mas01cr@239
|
45 // Linear scan of filenames check for pre-existing feature
|
mas01cr@239
|
46 unsigned alreadyInserted=0;
|
mas01cr@239
|
47 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@256
|
48 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
|
mas01cr@239
|
49 alreadyInserted=1;
|
mas01cr@239
|
50 break;
|
mas01cr@239
|
51 }
|
mas01cr@239
|
52
|
mas01cr@239
|
53 if(alreadyInserted) {
|
mas01cr@239
|
54 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
|
mas01mc@316
|
55 // FIXME: Do we need to munmap here (see below) ? MKC 18/08/08
|
mas01cr@239
|
56 return;
|
mas01cr@239
|
57 }
|
mas01cr@239
|
58
|
mas01cr@239
|
59 // Make a track index table of features to file indexes
|
mas01cr@239
|
60 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@239
|
61 if(!numVectors) {
|
mas01cr@239
|
62 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
|
mas01cr@239
|
63
|
mas01cr@239
|
64 // CLEAN UP
|
mas01cr@239
|
65 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
66 munmap(db,dbH->dbSize);
|
mas01cr@239
|
67 close(infid);
|
mas01cr@239
|
68 return;
|
mas01cr@239
|
69 }
|
mas01cr@239
|
70
|
mas01mc@316
|
71 INSERT_FILETABLE_STRING(fileTable, key);
|
mas01cr@239
|
72
|
mas01cr@239
|
73 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@239
|
74
|
mas01cr@239
|
75 // Check times status and insert times from file
|
mas01cr@239
|
76 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
|
mas01cr@239
|
77 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@239
|
78
|
mas01cr@239
|
79 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@239
|
80 error("out of space for times", key);
|
mas01cr@239
|
81 }
|
mas01cr@239
|
82
|
mas01cr@239
|
83 if (usingTimes) {
|
mas01cr@239
|
84 insertTimeStamps(numVectors, timesFile, timesdata);
|
mas01cr@239
|
85 }
|
mas01cr@239
|
86
|
mas01cr@239
|
87 double *powerdata = powerTable + indexoffset;
|
mas01cr@239
|
88 insertPowerData(numVectors, powerfd, powerdata);
|
mas01cr@239
|
89
|
mas01cr@239
|
90 // Increment file count
|
mas01cr@239
|
91 dbH->numFiles++;
|
mas01cr@239
|
92
|
mas01cr@239
|
93 // Update Header information
|
mas01cr@239
|
94 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@239
|
95
|
mas01cr@239
|
96 // Update track to file index map
|
mas01cr@239
|
97 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
|
mas01cr@239
|
98
|
mas01cr@239
|
99 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@239
|
100
|
mas01cr@239
|
101 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@239
|
102 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@239
|
103 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@239
|
104
|
mas01cr@239
|
105 // Report status
|
mas01cr@239
|
106 status(dbName);
|
mas01cr@239
|
107 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
|
mas01cr@239
|
108
|
mas01cr@239
|
109 // Copy the header back to the database
|
mas01cr@239
|
110 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@239
|
111
|
mas01cr@239
|
112 // CLEAN UP
|
mas01cr@239
|
113 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
114 close(infid);
|
mas01cr@239
|
115 }
|
mas01cr@239
|
116
|
mas01cr@239
|
117 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
|
mas01cr@239
|
118 assert(usingTimes);
|
mas01cr@239
|
119
|
mas01cr@239
|
120 unsigned numtimes = 0;
|
mas01cr@239
|
121
|
mas01cr@239
|
122 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
|
mas01cr@239
|
123 dbH->flags=dbH->flags|O2_FLAG_TIMES;
|
mas01cr@239
|
124 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
|
mas01cr@239
|
125 error("Timestamp file used with non-timestamped database", timesFileName);
|
mas01cr@239
|
126 }
|
mas01cr@239
|
127
|
mas01cr@239
|
128 if(!timesFile->is_open()) {
|
mas01cr@239
|
129 error("problem opening times file on timestamped database", timesFileName);
|
mas01cr@239
|
130 }
|
mas01cr@239
|
131
|
mas01cr@239
|
132 double timepoint, next;
|
mas01cr@239
|
133 *timesFile >> timepoint;
|
mas01cr@239
|
134 if (timesFile->eof()) {
|
mas01cr@239
|
135 error("no entries in times file", timesFileName);
|
mas01cr@239
|
136 }
|
mas01cr@239
|
137 numtimes++;
|
mas01cr@239
|
138 do {
|
mas01cr@239
|
139 *timesFile >> next;
|
mas01cr@239
|
140 if (timesFile->eof()) {
|
mas01cr@239
|
141 break;
|
mas01cr@239
|
142 }
|
mas01cr@239
|
143 numtimes++;
|
mas01cr@239
|
144 timesdata[0] = timepoint;
|
mas01cr@239
|
145 timepoint = (timesdata[1] = next);
|
mas01cr@239
|
146 timesdata += 2;
|
mas01cr@239
|
147 } while (numtimes < numVectors + 1);
|
mas01cr@239
|
148
|
mas01cr@239
|
149 if (numtimes < numVectors + 1) {
|
mas01cr@239
|
150 error("too few timepoints in times file", timesFileName);
|
mas01cr@239
|
151 }
|
mas01cr@239
|
152
|
mas01cr@239
|
153 *timesFile >> next;
|
mas01cr@239
|
154 if (!timesFile->eof()) {
|
mas01cr@239
|
155 error("too many timepoints in times file", timesFileName);
|
mas01cr@239
|
156 }
|
mas01cr@239
|
157 }
|
mas01cr@239
|
158
|
mas01cr@239
|
159 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
|
mas01mc@320
|
160 if(usingPower){
|
mas01cr@239
|
161 if (!(dbH->flags & O2_FLAG_POWER)) {
|
mas01cr@239
|
162 error("Cannot insert power data on non-power DB", dbName);
|
mas01cr@239
|
163 }
|
mas01mc@320
|
164
|
mas01cr@239
|
165 int one;
|
mas01cr@239
|
166 unsigned int count;
|
mas01mc@320
|
167
|
mas01cr@239
|
168 count = read(powerfd, &one, sizeof(unsigned int));
|
mas01cr@239
|
169 if (count != sizeof(unsigned int)) {
|
mas01cr@239
|
170 error("powerfd read failed", "int", "read");
|
mas01cr@239
|
171 }
|
mas01cr@239
|
172 if (one != 1) {
|
mas01cr@239
|
173 error("dimensionality of power file not 1", powerFileName);
|
mas01cr@239
|
174 }
|
mas01mc@320
|
175
|
mas01cr@239
|
176 // FIXME: should check that the powerfile is the right size for
|
mas01cr@239
|
177 // this. -- CSR, 2007-10-30
|
mas01cr@239
|
178 count = read(powerfd, powerdata, numVectors * sizeof(double));
|
mas01cr@239
|
179 if (count != numVectors * sizeof(double)) {
|
mas01cr@239
|
180 error("powerfd read failed", "double", "read");
|
mas01cr@239
|
181 }
|
mas01cr@239
|
182 }
|
mas01cr@239
|
183 }
|
mas01cr@239
|
184
|
mas01cr@239
|
185 void audioDB::batchinsert(const char* dbName, const char* inFile) {
|
mas01cr@239
|
186
|
mas01cr@239
|
187 forWrite = true;
|
mas01cr@239
|
188 initDBHeader(dbName);
|
mas01cr@239
|
189
|
mas01mc@316
|
190 // Treat large ADB instances differently
|
mas01mc@316
|
191 if( dbH->flags & O2_FLAG_LARGE_ADB ){
|
mas01mc@316
|
192 batchinsert_large_adb(dbName, inFile) ;
|
mas01mc@316
|
193 return;
|
mas01mc@316
|
194 }
|
mas01mc@316
|
195
|
mas01cr@239
|
196 if(!key)
|
mas01cr@239
|
197 key=inFile;
|
mas01cr@239
|
198 std::ifstream *filesIn = 0;
|
mas01cr@239
|
199 std::ifstream *keysIn = 0;
|
mas01cr@239
|
200 std::ifstream* thisTimesFile = 0;
|
mas01cr@239
|
201 int thispowerfd = 0;
|
mas01cr@239
|
202
|
mas01cr@239
|
203 if(!(filesIn = new std::ifstream(inFile)))
|
mas01cr@239
|
204 error("Could not open batch in file", inFile);
|
mas01cr@239
|
205 if(key && key!=inFile)
|
mas01cr@239
|
206 if(!(keysIn = new std::ifstream(key)))
|
mas01cr@239
|
207 error("Could not open batch key file",key);
|
mas01cr@239
|
208
|
mas01cr@239
|
209 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@239
|
210 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@239
|
211
|
mas01cr@239
|
212 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@239
|
213 error("Must use power with power-enabled database", dbName);
|
mas01cr@239
|
214
|
mas01cr@239
|
215 unsigned totalVectors=0;
|
mas01cr@239
|
216 char *thisFile = new char[MAXSTR];
|
mas01cr@262
|
217 char *thisKey = 0;
|
mas01cr@262
|
218 if (key && (key != inFile)) {
|
mas01cr@262
|
219 thisKey = new char[MAXSTR];
|
mas01cr@262
|
220 }
|
mas01cr@239
|
221 char *thisTimesFileName = new char[MAXSTR];
|
mas01cr@239
|
222 char *thisPowerFileName = new char[MAXSTR];
|
mas01cr@302
|
223
|
mas01cr@302
|
224 std::set<std::string> s;
|
mas01cr@302
|
225
|
mas01cr@302
|
226 for (unsigned k = 0; k < dbH->numFiles; k++) {
|
mas01cr@302
|
227 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
|
mas01cr@302
|
228 }
|
mas01cr@302
|
229
|
mas01cr@302
|
230 do {
|
mas01cr@239
|
231 filesIn->getline(thisFile,MAXSTR);
|
mas01cr@262
|
232 if(key && key!=inFile) {
|
mas01cr@239
|
233 keysIn->getline(thisKey,MAXSTR);
|
mas01cr@262
|
234 } else {
|
mas01cr@239
|
235 thisKey = thisFile;
|
mas01cr@262
|
236 }
|
mas01cr@262
|
237 if(usingTimes) {
|
mas01cr@262
|
238 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01cr@262
|
239 }
|
mas01cr@262
|
240 if(usingPower) {
|
mas01cr@239
|
241 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01cr@262
|
242 }
|
mas01cr@239
|
243
|
mas01cr@262
|
244 if(filesIn->eof()) {
|
mas01cr@239
|
245 break;
|
mas01cr@262
|
246 }
|
mas01cr@239
|
247 initInputFile(thisFile);
|
mas01cr@239
|
248
|
mas01cr@251
|
249 if(!enough_per_file_space_free()) {
|
mas01cr@251
|
250 error("batchinsert failed: no more room for metadata", thisFile);
|
mas01cr@251
|
251 }
|
mas01cr@251
|
252
|
mas01cr@239
|
253 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@239
|
254 error("batchinsert failed: no more room in database", thisFile);
|
mas01cr@239
|
255 }
|
mas01cr@239
|
256
|
mas01cr@302
|
257 if(s.count(thisKey)) {
|
mas01cr@239
|
258 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
|
mas01cr@239
|
259 } else {
|
mas01cr@302
|
260 s.insert(thisKey);
|
mas01cr@239
|
261 // Make a track index table of features to file indexes
|
mas01cr@239
|
262 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@239
|
263 if(!numVectors) {
|
mas01cr@239
|
264 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
|
mas01cr@239
|
265 }
|
mas01cr@239
|
266 else{
|
mas01cr@239
|
267 if(usingTimes){
|
mas01cr@239
|
268 if(timesFile->eof()) {
|
mas01cr@239
|
269 error("not enough timestamp files in timesList", timesFileName);
|
mas01cr@239
|
270 }
|
mas01cr@239
|
271 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01cr@239
|
272 if(!thisTimesFile->is_open()) {
|
mas01cr@239
|
273 error("Cannot open timestamp file", thisTimesFileName);
|
mas01cr@239
|
274 }
|
mas01cr@239
|
275 off_t insertoffset = dbH->length;
|
mas01cr@239
|
276 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
|
mas01cr@239
|
277 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@239
|
278 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@239
|
279 error("out of space for times", key);
|
mas01cr@239
|
280 }
|
mas01cr@239
|
281 insertTimeStamps(numVectors, thisTimesFile, timesdata);
|
mas01cr@239
|
282 if(thisTimesFile)
|
mas01cr@239
|
283 delete thisTimesFile;
|
mas01cr@239
|
284 }
|
mas01cr@239
|
285
|
mas01cr@239
|
286 if (usingPower) {
|
mas01cr@239
|
287 if(powerFile->eof()) {
|
mas01cr@239
|
288 error("not enough power files in powerList", powerFileName);
|
mas01cr@239
|
289 }
|
mas01cr@239
|
290 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01cr@239
|
291 if (thispowerfd < 0) {
|
mas01cr@239
|
292 error("failed to open power file", thisPowerFileName);
|
mas01cr@239
|
293 }
|
mas01cr@239
|
294 off_t insertoffset = dbH->length;
|
mas01cr@239
|
295 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
|
mas01cr@239
|
296 double *powerdata = powerTable + poweroffset;
|
mas01cr@239
|
297 insertPowerData(numVectors, thispowerfd, powerdata);
|
mas01cr@239
|
298 if (0 < thispowerfd) {
|
mas01cr@239
|
299 close(thispowerfd);
|
mas01cr@239
|
300 }
|
mas01cr@239
|
301 }
|
mas01mc@316
|
302
|
mas01mc@316
|
303 INSERT_FILETABLE_STRING(fileTable, thisKey);
|
mas01mc@316
|
304
|
mas01cr@239
|
305 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@239
|
306
|
mas01cr@239
|
307 // Increment file count
|
mas01cr@239
|
308 dbH->numFiles++;
|
mas01cr@239
|
309
|
mas01cr@239
|
310 // Update Header information
|
mas01cr@239
|
311 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@239
|
312
|
mas01cr@239
|
313 // Update track to file index map
|
mas01cr@239
|
314 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01mc@316
|
315
|
mas01cr@239
|
316 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@239
|
317
|
mas01cr@239
|
318 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@239
|
319 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@239
|
320 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@239
|
321
|
mas01cr@239
|
322 totalVectors+=numVectors;
|
mas01cr@239
|
323
|
mas01cr@239
|
324 // Copy the header back to the database
|
mas01cr@239
|
325 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@239
|
326 }
|
mas01cr@239
|
327 }
|
mas01cr@239
|
328 // CLEAN UP
|
mas01cr@239
|
329 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
330 close(infid);
|
mas01cr@239
|
331 } while(!filesIn->eof());
|
mas01cr@239
|
332
|
mas01cr@239
|
333 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01cr@262
|
334
|
mas01cr@262
|
335 delete [] thisPowerFileName;
|
mas01cr@262
|
336 if(key && (key != inFile)) {
|
mas01cr@262
|
337 delete [] thisKey;
|
mas01cr@262
|
338 }
|
mas01cr@262
|
339 delete [] thisFile;
|
mas01cr@262
|
340 delete [] thisTimesFileName;
|
mas01cr@239
|
341
|
mas01cr@262
|
342 delete filesIn;
|
mas01cr@262
|
343 delete keysIn;
|
mas01cr@262
|
344
|
mas01cr@239
|
345 // Report status
|
mas01cr@239
|
346 status(dbName);
|
mas01cr@239
|
347 }
|
mas01mc@316
|
348
|
mas01mc@316
|
349
|
mas01mc@316
|
350 // BATCHINSERT_LARGE_ADB
|
mas01mc@316
|
351 //
|
mas01mc@316
|
352 // This method inserts file pointers into the ADB instance rather than the actual feature data
|
mas01mc@316
|
353 //
|
mas01mc@316
|
354 // This method is intended for databases that are large enough to only support indexed query
|
mas01mc@316
|
355 // So exhaustive searching across all feature vectors will not be performed
|
mas01mc@316
|
356 //
|
mas01mc@316
|
357 // We insert featureFileName, [powerFileName], [timesFileName]
|
mas01mc@316
|
358 //
|
mas01mc@316
|
359 // l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
|
mas01mc@316
|
360 //
|
mas01mc@316
|
361 // LIMITS:
|
mas01mc@316
|
362 //
|
mas01mc@316
|
363 // We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
|
mas01mc@316
|
364 //
|
mas01mc@316
|
365 void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
|
mas01mc@316
|
366
|
mas01mc@316
|
367 if(!key)
|
mas01mc@316
|
368 key=inFile;
|
mas01mc@316
|
369 std::ifstream *filesIn = 0;
|
mas01mc@316
|
370 std::ifstream *keysIn = 0;
|
mas01mc@316
|
371 std::ifstream* thisTimesFile = 0;
|
mas01mc@316
|
372 int thispowerfd = 0;
|
mas01mc@316
|
373
|
mas01mc@316
|
374 if(!(filesIn = new std::ifstream(inFile)))
|
mas01mc@316
|
375 error("Could not open batch in file", inFile);
|
mas01mc@316
|
376 if(key && key!=inFile)
|
mas01mc@316
|
377 if(!(keysIn = new std::ifstream(key)))
|
mas01mc@316
|
378 error("Could not open batch key file",key);
|
mas01mc@316
|
379
|
mas01mc@316
|
380 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01mc@316
|
381 error("Must use timestamps with timestamped database","use --times");
|
mas01mc@316
|
382
|
mas01mc@316
|
383 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01mc@316
|
384 error("Must use power with power-enabled database", dbName);
|
mas01mc@316
|
385
|
mas01mc@316
|
386 unsigned totalVectors=0;
|
mas01mc@316
|
387 char *thisFile = new char[MAXSTR];
|
mas01mc@316
|
388 char *thisKey = 0;
|
mas01mc@316
|
389 if (key && (key != inFile)) {
|
mas01mc@316
|
390 thisKey = new char[MAXSTR];
|
mas01mc@316
|
391 }
|
mas01mc@316
|
392 char *thisTimesFileName = new char[MAXSTR];
|
mas01mc@316
|
393 char *thisPowerFileName = new char[MAXSTR];
|
mas01mc@316
|
394
|
mas01mc@316
|
395 std::set<std::string> s;
|
mas01mc@316
|
396
|
mas01mc@316
|
397 for (unsigned k = 0; k < dbH->numFiles; k++) {
|
mas01mc@316
|
398 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
|
mas01mc@316
|
399 }
|
mas01mc@316
|
400
|
mas01mc@316
|
401 do {
|
mas01mc@316
|
402 filesIn->getline(thisFile,MAXSTR);
|
mas01mc@316
|
403 if(key && key!=inFile) {
|
mas01mc@316
|
404 keysIn->getline(thisKey,MAXSTR);
|
mas01mc@316
|
405 } else {
|
mas01mc@316
|
406 thisKey = thisFile;
|
mas01mc@316
|
407 }
|
mas01mc@316
|
408 if(usingTimes) {
|
mas01mc@316
|
409 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01mc@316
|
410 }
|
mas01mc@316
|
411 if(usingPower) {
|
mas01mc@316
|
412 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01mc@316
|
413 }
|
mas01mc@316
|
414
|
mas01mc@316
|
415 if(filesIn->eof()) {
|
mas01mc@316
|
416 break;
|
mas01mc@316
|
417 }
|
mas01mc@316
|
418
|
mas01mc@316
|
419 initInputFile(thisFile, false);
|
mas01mc@316
|
420
|
mas01mc@316
|
421 if(!enough_per_file_space_free()) {
|
mas01mc@316
|
422 error("batchinsert failed: no more room for metadata", thisFile);
|
mas01mc@316
|
423 }
|
mas01mc@316
|
424
|
mas01mc@316
|
425 if(s.count(thisKey)) {
|
mas01mc@316
|
426 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
|
mas01mc@316
|
427 } else {
|
mas01mc@316
|
428 s.insert(thisKey);
|
mas01mc@316
|
429 // Make a track index table of features to file indexes
|
mas01mc@316
|
430 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01mc@316
|
431 if(!numVectors) {
|
mas01mc@316
|
432 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
|
mas01mc@316
|
433 }
|
mas01mc@316
|
434 else{
|
mas01mc@316
|
435 // Check that time-stamp file exists
|
mas01mc@316
|
436 if(usingTimes){
|
mas01mc@316
|
437 if(timesFile->eof()) {
|
mas01mc@316
|
438 error("not enough timestamp files in timesList", timesFileName);
|
mas01mc@316
|
439 }
|
mas01mc@316
|
440 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01mc@316
|
441 if(!thisTimesFile->is_open()) {
|
mas01mc@316
|
442 error("Cannot open timestamp file", thisTimesFileName);
|
mas01mc@316
|
443 }
|
mas01mc@316
|
444 if(thisTimesFile)
|
mas01mc@316
|
445 delete thisTimesFile;
|
mas01mc@316
|
446 }
|
mas01mc@316
|
447
|
mas01mc@316
|
448 // Check that power file exists
|
mas01mc@316
|
449 if (usingPower) {
|
mas01mc@316
|
450 if(powerFile->eof()) {
|
mas01mc@316
|
451 error("not enough power files in powerList", powerFileName);
|
mas01mc@316
|
452 }
|
mas01mc@316
|
453 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01mc@316
|
454 if (thispowerfd < 0) {
|
mas01mc@316
|
455 error("failed to open power file", thisPowerFileName);
|
mas01mc@316
|
456 }
|
mas01mc@316
|
457 if (0 < thispowerfd) {
|
mas01mc@316
|
458 close(thispowerfd);
|
mas01mc@316
|
459 }
|
mas01mc@316
|
460 }
|
mas01mc@316
|
461
|
mas01mc@316
|
462 // persist links to the feature files for reading from filesystem later
|
mas01mc@316
|
463
|
mas01mc@316
|
464 // Primary Keys
|
mas01mc@316
|
465 INSERT_FILETABLE_STRING(fileTable, thisKey);
|
mas01mc@316
|
466
|
mas01mc@316
|
467 // Feature Vector fileNames
|
mas01mc@318
|
468 INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
|
mas01mc@316
|
469
|
mas01mc@316
|
470 // Time Stamp fileNames
|
mas01mc@316
|
471 if(usingTimes)
|
mas01mc@318
|
472 INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
|
mas01mc@316
|
473
|
mas01mc@316
|
474
|
mas01mc@316
|
475 // Power fileNames
|
mas01mc@316
|
476 if(usingPower)
|
mas01mc@318
|
477 INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
|
mas01mc@316
|
478
|
mas01mc@316
|
479 // Increment file count
|
mas01mc@316
|
480 dbH->numFiles++;
|
mas01mc@316
|
481
|
mas01mc@316
|
482 // Update Header information
|
mas01mc@316
|
483 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01mc@316
|
484
|
mas01mc@316
|
485 // Update track to file index map
|
mas01mc@316
|
486 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01mc@316
|
487
|
mas01mc@316
|
488 totalVectors+=numVectors;
|
mas01mc@316
|
489
|
mas01mc@316
|
490 // Copy the header back to the database
|
mas01mc@316
|
491 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01mc@316
|
492 }
|
mas01mc@316
|
493 }
|
mas01mc@316
|
494 // CLEAN UP
|
mas01mc@321
|
495 if(indata)
|
mas01mc@321
|
496 munmap(indata,statbuf.st_size);
|
mas01mc@321
|
497 if(infid>0)
|
mas01mc@321
|
498 close(infid);
|
mas01mc@316
|
499 } while(!filesIn->eof());
|
mas01mc@316
|
500
|
mas01mc@316
|
501 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01mc@316
|
502
|
mas01mc@316
|
503 delete [] thisPowerFileName;
|
mas01mc@316
|
504 if(key && (key != inFile)) {
|
mas01mc@316
|
505 delete [] thisKey;
|
mas01mc@316
|
506 }
|
mas01mc@316
|
507 delete [] thisFile;
|
mas01mc@316
|
508 delete [] thisTimesFileName;
|
mas01mc@316
|
509
|
mas01mc@316
|
510 delete filesIn;
|
mas01mc@316
|
511 delete keysIn;
|
mas01mc@316
|
512
|
mas01mc@316
|
513 // Report status
|
mas01mc@316
|
514 status(dbName);
|
mas01mc@316
|
515 }
|