annotate insert.cpp @ 323:64c844de82d0 large_adb

Fixed an indexing bug where rest of track was ignored after first shingle with power below threshold. Put default O2_LSH_POINT_BITS back to 14 (16384 points), can be altered at compile time with CFLAGS+=-DO2_LSH_POINT_BITS n
author mas01mc
date Thu, 21 Aug 2008 21:02:14 +0000
parents da2272e029b3
children
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2
mas01cr@251 3 bool audioDB::enough_per_file_space_free() {
mas01cr@251 4 unsigned int fmaxfiles, tmaxfiles;
mas01cr@251 5 unsigned int maxfiles;
mas01cr@251 6
mas01cr@256 7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
mas01cr@256 8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
mas01cr@251 9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@251 10 return(dbH->numFiles < maxfiles);
mas01cr@251 11 }
mas01cr@251 12
mas01cr@239 13 bool audioDB::enough_data_space_free(off_t size) {
mas01mc@316 14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@239 15 }
mas01cr@239 16
mas01cr@239 17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@239 18 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
mas01cr@239 19 write(dbfid, buffer, size);
mas01cr@239 20 }
mas01cr@239 21
mas01cr@239 22 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@239 23 forWrite = true;
mas01cr@239 24 initTables(dbName, inFile);
mas01cr@239 25
mas01mc@316 26 if(dbH->flags & O2_FLAG_LARGE_ADB)
mas01mc@316 27 error("Single-feature inserts not allowed with LARGE audioDB instances");
mas01mc@316 28
mas01cr@239 29 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 30 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 31
mas01cr@239 32 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 33 error("Must use power with power-enabled database", dbName);
mas01cr@239 34
mas01cr@251 35 if(!enough_per_file_space_free()) {
mas01cr@251 36 error("Insert failed: no more room for metadata", inFile);
mas01cr@251 37 }
mas01cr@251 38
mas01cr@239 39 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 40 error("Insert failed: no more room in database", inFile);
mas01cr@239 41 }
mas01cr@239 42
mas01cr@239 43 if(!key)
mas01cr@239 44 key=inFile;
mas01cr@239 45 // Linear scan of filenames check for pre-existing feature
mas01cr@239 46 unsigned alreadyInserted=0;
mas01cr@239 47 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 48 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
mas01cr@239 49 alreadyInserted=1;
mas01cr@239 50 break;
mas01cr@239 51 }
mas01cr@239 52
mas01cr@239 53 if(alreadyInserted) {
mas01cr@239 54 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
mas01mc@316 55 // FIXME: Do we need to munmap here (see below) ? MKC 18/08/08
mas01cr@239 56 return;
mas01cr@239 57 }
mas01cr@239 58
mas01cr@239 59 // Make a track index table of features to file indexes
mas01cr@239 60 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 61 if(!numVectors) {
mas01cr@239 62 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
mas01cr@239 63
mas01cr@239 64 // CLEAN UP
mas01cr@239 65 munmap(indata,statbuf.st_size);
mas01cr@239 66 munmap(db,dbH->dbSize);
mas01cr@239 67 close(infid);
mas01cr@239 68 return;
mas01cr@239 69 }
mas01cr@239 70
mas01mc@316 71 INSERT_FILETABLE_STRING(fileTable, key);
mas01cr@239 72
mas01cr@239 73 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 74
mas01cr@239 75 // Check times status and insert times from file
mas01cr@239 76 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
mas01cr@239 77 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 78
mas01cr@239 79 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 80 error("out of space for times", key);
mas01cr@239 81 }
mas01cr@239 82
mas01cr@239 83 if (usingTimes) {
mas01cr@239 84 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@239 85 }
mas01cr@239 86
mas01cr@239 87 double *powerdata = powerTable + indexoffset;
mas01cr@239 88 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@239 89
mas01cr@239 90 // Increment file count
mas01cr@239 91 dbH->numFiles++;
mas01cr@239 92
mas01cr@239 93 // Update Header information
mas01cr@239 94 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 95
mas01cr@239 96 // Update track to file index map
mas01cr@239 97 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
mas01cr@239 98
mas01cr@239 99 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 100
mas01cr@239 101 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 102 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 103 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 104
mas01cr@239 105 // Report status
mas01cr@239 106 status(dbName);
mas01cr@239 107 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
mas01cr@239 108
mas01cr@239 109 // Copy the header back to the database
mas01cr@239 110 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 111
mas01cr@239 112 // CLEAN UP
mas01cr@239 113 munmap(indata,statbuf.st_size);
mas01cr@239 114 close(infid);
mas01cr@239 115 }
mas01cr@239 116
mas01cr@239 117 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@239 118 assert(usingTimes);
mas01cr@239 119
mas01cr@239 120 unsigned numtimes = 0;
mas01cr@239 121
mas01cr@239 122 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
mas01cr@239 123 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@239 124 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 125 error("Timestamp file used with non-timestamped database", timesFileName);
mas01cr@239 126 }
mas01cr@239 127
mas01cr@239 128 if(!timesFile->is_open()) {
mas01cr@239 129 error("problem opening times file on timestamped database", timesFileName);
mas01cr@239 130 }
mas01cr@239 131
mas01cr@239 132 double timepoint, next;
mas01cr@239 133 *timesFile >> timepoint;
mas01cr@239 134 if (timesFile->eof()) {
mas01cr@239 135 error("no entries in times file", timesFileName);
mas01cr@239 136 }
mas01cr@239 137 numtimes++;
mas01cr@239 138 do {
mas01cr@239 139 *timesFile >> next;
mas01cr@239 140 if (timesFile->eof()) {
mas01cr@239 141 break;
mas01cr@239 142 }
mas01cr@239 143 numtimes++;
mas01cr@239 144 timesdata[0] = timepoint;
mas01cr@239 145 timepoint = (timesdata[1] = next);
mas01cr@239 146 timesdata += 2;
mas01cr@239 147 } while (numtimes < numVectors + 1);
mas01cr@239 148
mas01cr@239 149 if (numtimes < numVectors + 1) {
mas01cr@239 150 error("too few timepoints in times file", timesFileName);
mas01cr@239 151 }
mas01cr@239 152
mas01cr@239 153 *timesFile >> next;
mas01cr@239 154 if (!timesFile->eof()) {
mas01cr@239 155 error("too many timepoints in times file", timesFileName);
mas01cr@239 156 }
mas01cr@239 157 }
mas01cr@239 158
mas01cr@239 159 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01mc@320 160 if(usingPower){
mas01cr@239 161 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 162 error("Cannot insert power data on non-power DB", dbName);
mas01cr@239 163 }
mas01mc@320 164
mas01cr@239 165 int one;
mas01cr@239 166 unsigned int count;
mas01mc@320 167
mas01cr@239 168 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@239 169 if (count != sizeof(unsigned int)) {
mas01cr@239 170 error("powerfd read failed", "int", "read");
mas01cr@239 171 }
mas01cr@239 172 if (one != 1) {
mas01cr@239 173 error("dimensionality of power file not 1", powerFileName);
mas01cr@239 174 }
mas01mc@320 175
mas01cr@239 176 // FIXME: should check that the powerfile is the right size for
mas01cr@239 177 // this. -- CSR, 2007-10-30
mas01cr@239 178 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@239 179 if (count != numVectors * sizeof(double)) {
mas01cr@239 180 error("powerfd read failed", "double", "read");
mas01cr@239 181 }
mas01cr@239 182 }
mas01cr@239 183 }
mas01cr@239 184
mas01cr@239 185 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@239 186
mas01cr@239 187 forWrite = true;
mas01cr@239 188 initDBHeader(dbName);
mas01cr@239 189
mas01mc@316 190 // Treat large ADB instances differently
mas01mc@316 191 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@316 192 batchinsert_large_adb(dbName, inFile) ;
mas01mc@316 193 return;
mas01mc@316 194 }
mas01mc@316 195
mas01cr@239 196 if(!key)
mas01cr@239 197 key=inFile;
mas01cr@239 198 std::ifstream *filesIn = 0;
mas01cr@239 199 std::ifstream *keysIn = 0;
mas01cr@239 200 std::ifstream* thisTimesFile = 0;
mas01cr@239 201 int thispowerfd = 0;
mas01cr@239 202
mas01cr@239 203 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@239 204 error("Could not open batch in file", inFile);
mas01cr@239 205 if(key && key!=inFile)
mas01cr@239 206 if(!(keysIn = new std::ifstream(key)))
mas01cr@239 207 error("Could not open batch key file",key);
mas01cr@239 208
mas01cr@239 209 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 210 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 211
mas01cr@239 212 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 213 error("Must use power with power-enabled database", dbName);
mas01cr@239 214
mas01cr@239 215 unsigned totalVectors=0;
mas01cr@239 216 char *thisFile = new char[MAXSTR];
mas01cr@262 217 char *thisKey = 0;
mas01cr@262 218 if (key && (key != inFile)) {
mas01cr@262 219 thisKey = new char[MAXSTR];
mas01cr@262 220 }
mas01cr@239 221 char *thisTimesFileName = new char[MAXSTR];
mas01cr@239 222 char *thisPowerFileName = new char[MAXSTR];
mas01cr@302 223
mas01cr@302 224 std::set<std::string> s;
mas01cr@302 225
mas01cr@302 226 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01cr@302 227 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01cr@302 228 }
mas01cr@302 229
mas01cr@302 230 do {
mas01cr@239 231 filesIn->getline(thisFile,MAXSTR);
mas01cr@262 232 if(key && key!=inFile) {
mas01cr@239 233 keysIn->getline(thisKey,MAXSTR);
mas01cr@262 234 } else {
mas01cr@239 235 thisKey = thisFile;
mas01cr@262 236 }
mas01cr@262 237 if(usingTimes) {
mas01cr@262 238 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@262 239 }
mas01cr@262 240 if(usingPower) {
mas01cr@239 241 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@262 242 }
mas01cr@239 243
mas01cr@262 244 if(filesIn->eof()) {
mas01cr@239 245 break;
mas01cr@262 246 }
mas01cr@239 247 initInputFile(thisFile);
mas01cr@239 248
mas01cr@251 249 if(!enough_per_file_space_free()) {
mas01cr@251 250 error("batchinsert failed: no more room for metadata", thisFile);
mas01cr@251 251 }
mas01cr@251 252
mas01cr@239 253 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 254 error("batchinsert failed: no more room in database", thisFile);
mas01cr@239 255 }
mas01cr@239 256
mas01cr@302 257 if(s.count(thisKey)) {
mas01cr@239 258 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01cr@239 259 } else {
mas01cr@302 260 s.insert(thisKey);
mas01cr@239 261 // Make a track index table of features to file indexes
mas01cr@239 262 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 263 if(!numVectors) {
mas01cr@239 264 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01cr@239 265 }
mas01cr@239 266 else{
mas01cr@239 267 if(usingTimes){
mas01cr@239 268 if(timesFile->eof()) {
mas01cr@239 269 error("not enough timestamp files in timesList", timesFileName);
mas01cr@239 270 }
mas01cr@239 271 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01cr@239 272 if(!thisTimesFile->is_open()) {
mas01cr@239 273 error("Cannot open timestamp file", thisTimesFileName);
mas01cr@239 274 }
mas01cr@239 275 off_t insertoffset = dbH->length;
mas01cr@239 276 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
mas01cr@239 277 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 278 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 279 error("out of space for times", key);
mas01cr@239 280 }
mas01cr@239 281 insertTimeStamps(numVectors, thisTimesFile, timesdata);
mas01cr@239 282 if(thisTimesFile)
mas01cr@239 283 delete thisTimesFile;
mas01cr@239 284 }
mas01cr@239 285
mas01cr@239 286 if (usingPower) {
mas01cr@239 287 if(powerFile->eof()) {
mas01cr@239 288 error("not enough power files in powerList", powerFileName);
mas01cr@239 289 }
mas01cr@239 290 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@239 291 if (thispowerfd < 0) {
mas01cr@239 292 error("failed to open power file", thisPowerFileName);
mas01cr@239 293 }
mas01cr@239 294 off_t insertoffset = dbH->length;
mas01cr@239 295 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@239 296 double *powerdata = powerTable + poweroffset;
mas01cr@239 297 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@239 298 if (0 < thispowerfd) {
mas01cr@239 299 close(thispowerfd);
mas01cr@239 300 }
mas01cr@239 301 }
mas01mc@316 302
mas01mc@316 303 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01mc@316 304
mas01cr@239 305 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 306
mas01cr@239 307 // Increment file count
mas01cr@239 308 dbH->numFiles++;
mas01cr@239 309
mas01cr@239 310 // Update Header information
mas01cr@239 311 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 312
mas01cr@239 313 // Update track to file index map
mas01cr@239 314 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@316 315
mas01cr@239 316 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 317
mas01cr@239 318 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 319 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 320 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 321
mas01cr@239 322 totalVectors+=numVectors;
mas01cr@239 323
mas01cr@239 324 // Copy the header back to the database
mas01cr@239 325 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 326 }
mas01cr@239 327 }
mas01cr@239 328 // CLEAN UP
mas01cr@239 329 munmap(indata,statbuf.st_size);
mas01cr@239 330 close(infid);
mas01cr@239 331 } while(!filesIn->eof());
mas01cr@239 332
mas01cr@239 333 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01cr@262 334
mas01cr@262 335 delete [] thisPowerFileName;
mas01cr@262 336 if(key && (key != inFile)) {
mas01cr@262 337 delete [] thisKey;
mas01cr@262 338 }
mas01cr@262 339 delete [] thisFile;
mas01cr@262 340 delete [] thisTimesFileName;
mas01cr@239 341
mas01cr@262 342 delete filesIn;
mas01cr@262 343 delete keysIn;
mas01cr@262 344
mas01cr@239 345 // Report status
mas01cr@239 346 status(dbName);
mas01cr@239 347 }
mas01mc@316 348
mas01mc@316 349
mas01mc@316 350 // BATCHINSERT_LARGE_ADB
mas01mc@316 351 //
mas01mc@316 352 // This method inserts file pointers into the ADB instance rather than the actual feature data
mas01mc@316 353 //
mas01mc@316 354 // This method is intended for databases that are large enough to only support indexed query
mas01mc@316 355 // So exhaustive searching across all feature vectors will not be performed
mas01mc@316 356 //
mas01mc@316 357 // We insert featureFileName, [powerFileName], [timesFileName]
mas01mc@316 358 //
mas01mc@316 359 // l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
mas01mc@316 360 //
mas01mc@316 361 // LIMITS:
mas01mc@316 362 //
mas01mc@316 363 // We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
mas01mc@316 364 //
mas01mc@316 365 void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
mas01mc@316 366
mas01mc@316 367 if(!key)
mas01mc@316 368 key=inFile;
mas01mc@316 369 std::ifstream *filesIn = 0;
mas01mc@316 370 std::ifstream *keysIn = 0;
mas01mc@316 371 std::ifstream* thisTimesFile = 0;
mas01mc@316 372 int thispowerfd = 0;
mas01mc@316 373
mas01mc@316 374 if(!(filesIn = new std::ifstream(inFile)))
mas01mc@316 375 error("Could not open batch in file", inFile);
mas01mc@316 376 if(key && key!=inFile)
mas01mc@316 377 if(!(keysIn = new std::ifstream(key)))
mas01mc@316 378 error("Could not open batch key file",key);
mas01mc@316 379
mas01mc@316 380 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01mc@316 381 error("Must use timestamps with timestamped database","use --times");
mas01mc@316 382
mas01mc@316 383 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01mc@316 384 error("Must use power with power-enabled database", dbName);
mas01mc@316 385
mas01mc@316 386 unsigned totalVectors=0;
mas01mc@316 387 char *thisFile = new char[MAXSTR];
mas01mc@316 388 char *thisKey = 0;
mas01mc@316 389 if (key && (key != inFile)) {
mas01mc@316 390 thisKey = new char[MAXSTR];
mas01mc@316 391 }
mas01mc@316 392 char *thisTimesFileName = new char[MAXSTR];
mas01mc@316 393 char *thisPowerFileName = new char[MAXSTR];
mas01mc@316 394
mas01mc@316 395 std::set<std::string> s;
mas01mc@316 396
mas01mc@316 397 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01mc@316 398 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01mc@316 399 }
mas01mc@316 400
mas01mc@316 401 do {
mas01mc@316 402 filesIn->getline(thisFile,MAXSTR);
mas01mc@316 403 if(key && key!=inFile) {
mas01mc@316 404 keysIn->getline(thisKey,MAXSTR);
mas01mc@316 405 } else {
mas01mc@316 406 thisKey = thisFile;
mas01mc@316 407 }
mas01mc@316 408 if(usingTimes) {
mas01mc@316 409 timesFile->getline(thisTimesFileName,MAXSTR);
mas01mc@316 410 }
mas01mc@316 411 if(usingPower) {
mas01mc@316 412 powerFile->getline(thisPowerFileName, MAXSTR);
mas01mc@316 413 }
mas01mc@316 414
mas01mc@316 415 if(filesIn->eof()) {
mas01mc@316 416 break;
mas01mc@316 417 }
mas01mc@316 418
mas01mc@316 419 initInputFile(thisFile, false);
mas01mc@316 420
mas01mc@316 421 if(!enough_per_file_space_free()) {
mas01mc@316 422 error("batchinsert failed: no more room for metadata", thisFile);
mas01mc@316 423 }
mas01mc@316 424
mas01mc@316 425 if(s.count(thisKey)) {
mas01mc@316 426 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01mc@316 427 } else {
mas01mc@316 428 s.insert(thisKey);
mas01mc@316 429 // Make a track index table of features to file indexes
mas01mc@316 430 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01mc@316 431 if(!numVectors) {
mas01mc@316 432 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01mc@316 433 }
mas01mc@316 434 else{
mas01mc@316 435 // Check that time-stamp file exists
mas01mc@316 436 if(usingTimes){
mas01mc@316 437 if(timesFile->eof()) {
mas01mc@316 438 error("not enough timestamp files in timesList", timesFileName);
mas01mc@316 439 }
mas01mc@316 440 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01mc@316 441 if(!thisTimesFile->is_open()) {
mas01mc@316 442 error("Cannot open timestamp file", thisTimesFileName);
mas01mc@316 443 }
mas01mc@316 444 if(thisTimesFile)
mas01mc@316 445 delete thisTimesFile;
mas01mc@316 446 }
mas01mc@316 447
mas01mc@316 448 // Check that power file exists
mas01mc@316 449 if (usingPower) {
mas01mc@316 450 if(powerFile->eof()) {
mas01mc@316 451 error("not enough power files in powerList", powerFileName);
mas01mc@316 452 }
mas01mc@316 453 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01mc@316 454 if (thispowerfd < 0) {
mas01mc@316 455 error("failed to open power file", thisPowerFileName);
mas01mc@316 456 }
mas01mc@316 457 if (0 < thispowerfd) {
mas01mc@316 458 close(thispowerfd);
mas01mc@316 459 }
mas01mc@316 460 }
mas01mc@316 461
mas01mc@316 462 // persist links to the feature files for reading from filesystem later
mas01mc@316 463
mas01mc@316 464 // Primary Keys
mas01mc@316 465 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01mc@316 466
mas01mc@316 467 // Feature Vector fileNames
mas01mc@318 468 INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
mas01mc@316 469
mas01mc@316 470 // Time Stamp fileNames
mas01mc@316 471 if(usingTimes)
mas01mc@318 472 INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
mas01mc@316 473
mas01mc@316 474
mas01mc@316 475 // Power fileNames
mas01mc@316 476 if(usingPower)
mas01mc@318 477 INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
mas01mc@316 478
mas01mc@316 479 // Increment file count
mas01mc@316 480 dbH->numFiles++;
mas01mc@316 481
mas01mc@316 482 // Update Header information
mas01mc@316 483 dbH->length+=(statbuf.st_size-sizeof(int));
mas01mc@316 484
mas01mc@316 485 // Update track to file index map
mas01mc@316 486 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@316 487
mas01mc@316 488 totalVectors+=numVectors;
mas01mc@316 489
mas01mc@316 490 // Copy the header back to the database
mas01mc@316 491 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01mc@316 492 }
mas01mc@316 493 }
mas01mc@316 494 // CLEAN UP
mas01mc@321 495 if(indata)
mas01mc@321 496 munmap(indata,statbuf.st_size);
mas01mc@321 497 if(infid>0)
mas01mc@321 498 close(infid);
mas01mc@316 499 } while(!filesIn->eof());
mas01mc@316 500
mas01mc@316 501 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01mc@316 502
mas01mc@316 503 delete [] thisPowerFileName;
mas01mc@316 504 if(key && (key != inFile)) {
mas01mc@316 505 delete [] thisKey;
mas01mc@316 506 }
mas01mc@316 507 delete [] thisFile;
mas01mc@316 508 delete [] thisTimesFileName;
mas01mc@316 509
mas01mc@316 510 delete filesIn;
mas01mc@316 511 delete keysIn;
mas01mc@316 512
mas01mc@316 513 // Report status
mas01mc@316 514 status(dbName);
mas01mc@316 515 }