annotate insert.cpp @ 370:2d5c3f8e8c22

Merge gcc-4.3-cleanups branch (-r629:642, but I consider that branch as having served its purpose) onto the trunk. Now compiles cleanly even with a fairly picky gcc. I await version 4.4 with bated breath.
author mas01cr
date Wed, 12 Nov 2008 15:40:40 +0000
parents c93be2f3a674
children 61b40ed4dc62
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2
mas01cr@251 3 bool audioDB::enough_per_file_space_free() {
mas01cr@251 4 unsigned int fmaxfiles, tmaxfiles;
mas01cr@251 5 unsigned int maxfiles;
mas01cr@251 6
mas01cr@256 7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
mas01cr@256 8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
mas01cr@251 9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@251 10 return(dbH->numFiles < maxfiles);
mas01cr@251 11 }
mas01cr@251 12
mas01cr@239 13 bool audioDB::enough_data_space_free(off_t size) {
mas01mc@324 14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@239 15 }
mas01cr@239 16
mas01cr@239 17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@370 18 if(lseek(dbfid, dbH->dataOffset + offset, SEEK_SET) == (off_t) -1) {
mas01cr@370 19 error("error seeking to offset", "", "lseek");
mas01cr@370 20 }
mas01cr@370 21 CHECKED_WRITE(dbfid, buffer, size);
mas01cr@239 22 }
mas01cr@239 23
mas01cr@239 24 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@239 25 forWrite = true;
mas01cr@239 26 initTables(dbName, inFile);
mas01cr@239 27
mas01mc@324 28 if(dbH->flags & O2_FLAG_LARGE_ADB)
mas01mc@324 29 error("Single-feature inserts not allowed with LARGE audioDB instances");
mas01mc@324 30
mas01cr@239 31 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 32 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 33
mas01cr@239 34 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 35 error("Must use power with power-enabled database", dbName);
mas01cr@239 36
mas01cr@251 37 if(!enough_per_file_space_free()) {
mas01cr@251 38 error("Insert failed: no more room for metadata", inFile);
mas01cr@251 39 }
mas01cr@251 40
mas01cr@239 41 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 42 error("Insert failed: no more room in database", inFile);
mas01cr@239 43 }
mas01cr@239 44
mas01cr@239 45 if(!key)
mas01cr@239 46 key=inFile;
mas01cr@239 47 // Linear scan of filenames check for pre-existing feature
mas01cr@239 48 unsigned alreadyInserted=0;
mas01cr@239 49 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 50 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
mas01cr@239 51 alreadyInserted=1;
mas01cr@239 52 break;
mas01cr@239 53 }
mas01cr@239 54
mas01cr@239 55 if(alreadyInserted) {
mas01cr@239 56 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
mas01mc@324 57 // FIXME: Do we need to munmap here (see below) ? MKC 18/08/08
mas01cr@239 58 return;
mas01cr@239 59 }
mas01cr@239 60
mas01cr@239 61 // Make a track index table of features to file indexes
mas01cr@239 62 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 63 if(!numVectors) {
mas01cr@239 64 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
mas01cr@239 65
mas01cr@239 66 // CLEAN UP
mas01cr@239 67 munmap(indata,statbuf.st_size);
mas01cr@239 68 munmap(db,dbH->dbSize);
mas01cr@239 69 close(infid);
mas01cr@239 70 return;
mas01cr@239 71 }
mas01cr@239 72
mas01mc@324 73 INSERT_FILETABLE_STRING(fileTable, key);
mas01cr@239 74
mas01cr@239 75 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 76
mas01cr@239 77 // Check times status and insert times from file
mas01cr@239 78 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
mas01cr@239 79 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 80
mas01cr@239 81 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 82 error("out of space for times", key);
mas01cr@239 83 }
mas01cr@239 84
mas01cr@239 85 if (usingTimes) {
mas01cr@239 86 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@239 87 }
mas01cr@239 88
mas01cr@239 89 double *powerdata = powerTable + indexoffset;
mas01cr@239 90 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@239 91
mas01cr@239 92 // Increment file count
mas01cr@239 93 dbH->numFiles++;
mas01cr@239 94
mas01cr@239 95 // Update Header information
mas01cr@239 96 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 97
mas01cr@239 98 // Update track to file index map
mas01cr@239 99 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
mas01cr@239 100
mas01cr@239 101 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 102
mas01cr@239 103 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 104 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 105 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 106
mas01cr@239 107 // Report status
mas01cr@239 108 status(dbName);
mas01cr@239 109 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
mas01cr@239 110
mas01cr@239 111 // Copy the header back to the database
mas01cr@239 112 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 113
mas01cr@239 114 // CLEAN UP
mas01cr@239 115 munmap(indata,statbuf.st_size);
mas01cr@239 116 close(infid);
mas01cr@239 117 }
mas01cr@239 118
mas01cr@239 119 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@239 120 assert(usingTimes);
mas01cr@239 121
mas01cr@239 122 unsigned numtimes = 0;
mas01cr@239 123
mas01cr@239 124 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
mas01cr@239 125 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@239 126 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 127 error("Timestamp file used with non-timestamped database", timesFileName);
mas01cr@239 128 }
mas01cr@239 129
mas01cr@239 130 if(!timesFile->is_open()) {
mas01cr@239 131 error("problem opening times file on timestamped database", timesFileName);
mas01cr@239 132 }
mas01cr@239 133
mas01cr@239 134 double timepoint, next;
mas01cr@239 135 *timesFile >> timepoint;
mas01cr@239 136 if (timesFile->eof()) {
mas01cr@239 137 error("no entries in times file", timesFileName);
mas01cr@239 138 }
mas01cr@239 139 numtimes++;
mas01cr@239 140 do {
mas01cr@239 141 *timesFile >> next;
mas01cr@239 142 if (timesFile->eof()) {
mas01cr@239 143 break;
mas01cr@239 144 }
mas01cr@239 145 numtimes++;
mas01cr@239 146 timesdata[0] = timepoint;
mas01cr@239 147 timepoint = (timesdata[1] = next);
mas01cr@239 148 timesdata += 2;
mas01cr@239 149 } while (numtimes < numVectors + 1);
mas01cr@239 150
mas01cr@239 151 if (numtimes < numVectors + 1) {
mas01cr@239 152 error("too few timepoints in times file", timesFileName);
mas01cr@239 153 }
mas01cr@239 154
mas01cr@239 155 *timesFile >> next;
mas01cr@239 156 if (!timesFile->eof()) {
mas01cr@239 157 error("too many timepoints in times file", timesFileName);
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160
mas01cr@239 161 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01mc@324 162 if(usingPower){
mas01cr@239 163 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 164 error("Cannot insert power data on non-power DB", dbName);
mas01cr@239 165 }
mas01mc@324 166
mas01cr@239 167 int one;
mas01cr@239 168 unsigned int count;
mas01mc@324 169
mas01cr@239 170 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@239 171 if (count != sizeof(unsigned int)) {
mas01cr@239 172 error("powerfd read failed", "int", "read");
mas01cr@239 173 }
mas01cr@239 174 if (one != 1) {
mas01cr@239 175 error("dimensionality of power file not 1", powerFileName);
mas01cr@239 176 }
mas01mc@324 177
mas01cr@239 178 // FIXME: should check that the powerfile is the right size for
mas01cr@239 179 // this. -- CSR, 2007-10-30
mas01cr@239 180 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@239 181 if (count != numVectors * sizeof(double)) {
mas01cr@239 182 error("powerfd read failed", "double", "read");
mas01cr@239 183 }
mas01cr@239 184 }
mas01cr@239 185 }
mas01cr@239 186
mas01cr@239 187 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@239 188
mas01cr@239 189 forWrite = true;
mas01cr@239 190 initDBHeader(dbName);
mas01cr@239 191
mas01mc@324 192 // Treat large ADB instances differently
mas01mc@324 193 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 194 batchinsert_large_adb(dbName, inFile) ;
mas01mc@324 195 return;
mas01mc@324 196 }
mas01mc@324 197
mas01cr@239 198 if(!key)
mas01cr@239 199 key=inFile;
mas01cr@239 200 std::ifstream *filesIn = 0;
mas01cr@239 201 std::ifstream *keysIn = 0;
mas01cr@239 202 std::ifstream* thisTimesFile = 0;
mas01cr@239 203 int thispowerfd = 0;
mas01cr@239 204
mas01cr@239 205 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@239 206 error("Could not open batch in file", inFile);
mas01cr@239 207 if(key && key!=inFile)
mas01cr@239 208 if(!(keysIn = new std::ifstream(key)))
mas01cr@239 209 error("Could not open batch key file",key);
mas01cr@239 210
mas01cr@239 211 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 212 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 213
mas01cr@239 214 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 215 error("Must use power with power-enabled database", dbName);
mas01cr@239 216
mas01cr@239 217 unsigned totalVectors=0;
mas01cr@239 218 char *thisFile = new char[MAXSTR];
mas01cr@262 219 char *thisKey = 0;
mas01cr@262 220 if (key && (key != inFile)) {
mas01cr@262 221 thisKey = new char[MAXSTR];
mas01cr@262 222 }
mas01cr@239 223 char *thisTimesFileName = new char[MAXSTR];
mas01cr@239 224 char *thisPowerFileName = new char[MAXSTR];
mas01cr@302 225
mas01cr@302 226 std::set<std::string> s;
mas01cr@302 227
mas01cr@302 228 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01cr@302 229 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01cr@302 230 }
mas01cr@302 231
mas01cr@302 232 do {
mas01cr@239 233 filesIn->getline(thisFile,MAXSTR);
mas01cr@262 234 if(key && key!=inFile) {
mas01cr@239 235 keysIn->getline(thisKey,MAXSTR);
mas01cr@262 236 } else {
mas01cr@239 237 thisKey = thisFile;
mas01cr@262 238 }
mas01cr@262 239 if(usingTimes) {
mas01cr@262 240 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@262 241 }
mas01cr@262 242 if(usingPower) {
mas01cr@239 243 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@262 244 }
mas01cr@239 245
mas01cr@262 246 if(filesIn->eof()) {
mas01cr@239 247 break;
mas01cr@262 248 }
mas01cr@239 249 initInputFile(thisFile);
mas01cr@239 250
mas01cr@251 251 if(!enough_per_file_space_free()) {
mas01cr@251 252 error("batchinsert failed: no more room for metadata", thisFile);
mas01cr@251 253 }
mas01cr@251 254
mas01cr@239 255 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 256 error("batchinsert failed: no more room in database", thisFile);
mas01cr@239 257 }
mas01cr@239 258
mas01cr@302 259 if(s.count(thisKey)) {
mas01cr@239 260 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01cr@239 261 } else {
mas01cr@302 262 s.insert(thisKey);
mas01cr@239 263 // Make a track index table of features to file indexes
mas01cr@239 264 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 265 if(!numVectors) {
mas01cr@239 266 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01cr@239 267 }
mas01cr@239 268 else{
mas01cr@239 269 if(usingTimes){
mas01cr@239 270 if(timesFile->eof()) {
mas01cr@239 271 error("not enough timestamp files in timesList", timesFileName);
mas01cr@239 272 }
mas01cr@239 273 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01cr@239 274 if(!thisTimesFile->is_open()) {
mas01cr@239 275 error("Cannot open timestamp file", thisTimesFileName);
mas01cr@239 276 }
mas01cr@239 277 off_t insertoffset = dbH->length;
mas01cr@239 278 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
mas01cr@239 279 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 280 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 281 error("out of space for times", key);
mas01cr@239 282 }
mas01cr@239 283 insertTimeStamps(numVectors, thisTimesFile, timesdata);
mas01cr@239 284 if(thisTimesFile)
mas01cr@239 285 delete thisTimesFile;
mas01cr@239 286 }
mas01cr@239 287
mas01cr@239 288 if (usingPower) {
mas01cr@239 289 if(powerFile->eof()) {
mas01cr@239 290 error("not enough power files in powerList", powerFileName);
mas01cr@239 291 }
mas01cr@239 292 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@239 293 if (thispowerfd < 0) {
mas01cr@239 294 error("failed to open power file", thisPowerFileName);
mas01cr@239 295 }
mas01cr@239 296 off_t insertoffset = dbH->length;
mas01cr@239 297 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@239 298 double *powerdata = powerTable + poweroffset;
mas01cr@239 299 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@239 300 if (0 < thispowerfd) {
mas01cr@239 301 close(thispowerfd);
mas01cr@239 302 }
mas01cr@239 303 }
mas01mc@324 304
mas01mc@324 305 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01mc@324 306
mas01cr@239 307 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 308
mas01cr@239 309 // Increment file count
mas01cr@239 310 dbH->numFiles++;
mas01cr@239 311
mas01cr@239 312 // Update Header information
mas01cr@239 313 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 314
mas01cr@239 315 // Update track to file index map
mas01cr@239 316 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@324 317
mas01cr@239 318 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 319
mas01cr@239 320 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 321 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 322 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 323
mas01cr@239 324 totalVectors+=numVectors;
mas01cr@239 325
mas01cr@239 326 // Copy the header back to the database
mas01cr@239 327 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 328 }
mas01cr@239 329 }
mas01cr@239 330 // CLEAN UP
mas01cr@239 331 munmap(indata,statbuf.st_size);
mas01cr@239 332 close(infid);
mas01cr@239 333 } while(!filesIn->eof());
mas01cr@239 334
mas01cr@239 335 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01cr@262 336
mas01cr@262 337 delete [] thisPowerFileName;
mas01cr@262 338 if(key && (key != inFile)) {
mas01cr@262 339 delete [] thisKey;
mas01cr@262 340 }
mas01cr@262 341 delete [] thisFile;
mas01cr@262 342 delete [] thisTimesFileName;
mas01cr@239 343
mas01cr@262 344 delete filesIn;
mas01cr@262 345 delete keysIn;
mas01cr@262 346
mas01cr@239 347 // Report status
mas01cr@239 348 status(dbName);
mas01cr@239 349 }
mas01mc@324 350
mas01mc@324 351
mas01mc@324 352 // BATCHINSERT_LARGE_ADB
mas01mc@324 353 //
mas01mc@324 354 // This method inserts file pointers into the ADB instance rather than the actual feature data
mas01mc@324 355 //
mas01mc@324 356 // This method is intended for databases that are large enough to only support indexed query
mas01mc@324 357 // So exhaustive searching across all feature vectors will not be performed
mas01mc@324 358 //
mas01mc@324 359 // We insert featureFileName, [powerFileName], [timesFileName]
mas01mc@324 360 //
mas01mc@324 361 // l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
mas01mc@324 362 //
mas01mc@324 363 // LIMITS:
mas01mc@324 364 //
mas01mc@324 365 // We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
mas01mc@324 366 //
mas01mc@324 367 void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
mas01mc@324 368
mas01mc@324 369 if(!key)
mas01mc@324 370 key=inFile;
mas01mc@324 371 std::ifstream *filesIn = 0;
mas01mc@324 372 std::ifstream *keysIn = 0;
mas01mc@324 373 std::ifstream* thisTimesFile = 0;
mas01mc@324 374 int thispowerfd = 0;
mas01mc@324 375
mas01mc@324 376 if(!(filesIn = new std::ifstream(inFile)))
mas01mc@324 377 error("Could not open batch in file", inFile);
mas01mc@324 378 if(key && key!=inFile)
mas01mc@324 379 if(!(keysIn = new std::ifstream(key)))
mas01mc@324 380 error("Could not open batch key file",key);
mas01mc@324 381
mas01mc@324 382 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01mc@324 383 error("Must use timestamps with timestamped database","use --times");
mas01mc@324 384
mas01mc@324 385 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01mc@324 386 error("Must use power with power-enabled database", dbName);
mas01mc@324 387
mas01mc@324 388 unsigned totalVectors=0;
mas01mc@324 389 char *thisFile = new char[MAXSTR];
mas01mc@324 390 char *thisKey = 0;
mas01mc@324 391 if (key && (key != inFile)) {
mas01mc@324 392 thisKey = new char[MAXSTR];
mas01mc@324 393 }
mas01mc@324 394 char *thisTimesFileName = new char[MAXSTR];
mas01mc@324 395 char *thisPowerFileName = new char[MAXSTR];
mas01mc@324 396
mas01mc@324 397 std::set<std::string> s;
mas01mc@324 398
mas01mc@324 399 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01mc@324 400 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01mc@324 401 }
mas01mc@324 402
mas01mc@324 403 do {
mas01mc@324 404 filesIn->getline(thisFile,MAXSTR);
mas01mc@324 405 if(key && key!=inFile) {
mas01mc@324 406 keysIn->getline(thisKey,MAXSTR);
mas01mc@324 407 } else {
mas01mc@324 408 thisKey = thisFile;
mas01mc@324 409 }
mas01mc@324 410 if(usingTimes) {
mas01mc@324 411 timesFile->getline(thisTimesFileName,MAXSTR);
mas01mc@324 412 }
mas01mc@324 413 if(usingPower) {
mas01mc@324 414 powerFile->getline(thisPowerFileName, MAXSTR);
mas01mc@324 415 }
mas01mc@324 416
mas01mc@324 417 if(filesIn->eof()) {
mas01mc@324 418 break;
mas01mc@324 419 }
mas01mc@324 420
mas01mc@324 421 initInputFile(thisFile, false);
mas01mc@324 422
mas01mc@324 423 if(!enough_per_file_space_free()) {
mas01mc@324 424 error("batchinsert failed: no more room for metadata", thisFile);
mas01mc@324 425 }
mas01mc@324 426
mas01mc@324 427 if(s.count(thisKey)) {
mas01mc@324 428 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01mc@324 429 } else {
mas01mc@324 430 s.insert(thisKey);
mas01mc@324 431 // Make a track index table of features to file indexes
mas01mc@324 432 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01mc@324 433 if(!numVectors) {
mas01mc@324 434 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01mc@324 435 }
mas01mc@324 436 else{
mas01mc@324 437 // Check that time-stamp file exists
mas01mc@324 438 if(usingTimes){
mas01mc@324 439 if(timesFile->eof()) {
mas01mc@324 440 error("not enough timestamp files in timesList", timesFileName);
mas01mc@324 441 }
mas01mc@324 442 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01mc@324 443 if(!thisTimesFile->is_open()) {
mas01mc@324 444 error("Cannot open timestamp file", thisTimesFileName);
mas01mc@324 445 }
mas01mc@324 446 if(thisTimesFile)
mas01mc@324 447 delete thisTimesFile;
mas01mc@324 448 }
mas01mc@324 449
mas01mc@324 450 // Check that power file exists
mas01mc@324 451 if (usingPower) {
mas01mc@324 452 if(powerFile->eof()) {
mas01mc@324 453 error("not enough power files in powerList", powerFileName);
mas01mc@324 454 }
mas01mc@324 455 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01mc@324 456 if (thispowerfd < 0) {
mas01mc@324 457 error("failed to open power file", thisPowerFileName);
mas01mc@324 458 }
mas01mc@324 459 if (0 < thispowerfd) {
mas01mc@324 460 close(thispowerfd);
mas01mc@324 461 }
mas01mc@324 462 }
mas01mc@324 463
mas01mc@324 464 // persist links to the feature files for reading from filesystem later
mas01mc@324 465
mas01mc@324 466 // Primary Keys
mas01mc@324 467 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01mc@324 468
mas01mc@324 469 // Feature Vector fileNames
mas01mc@324 470 INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
mas01mc@324 471
mas01mc@324 472 // Time Stamp fileNames
mas01mc@324 473 if(usingTimes)
mas01mc@324 474 INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
mas01mc@324 475
mas01mc@324 476
mas01mc@324 477 // Power fileNames
mas01mc@324 478 if(usingPower)
mas01mc@324 479 INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
mas01mc@324 480
mas01mc@324 481 // Increment file count
mas01mc@324 482 dbH->numFiles++;
mas01mc@324 483
mas01mc@324 484 // Update Header information
mas01mc@324 485 dbH->length+=(statbuf.st_size-sizeof(int));
mas01mc@324 486
mas01mc@324 487 // Update track to file index map
mas01mc@324 488 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@324 489
mas01mc@324 490 totalVectors+=numVectors;
mas01mc@324 491
mas01mc@324 492 // Copy the header back to the database
mas01mc@324 493 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01mc@324 494 }
mas01mc@324 495 }
mas01mc@324 496 // CLEAN UP
mas01mc@324 497 if(indata)
mas01mc@324 498 munmap(indata,statbuf.st_size);
mas01mc@324 499 if(infid>0)
mas01mc@324 500 close(infid);
mas01mc@324 501 } while(!filesIn->eof());
mas01mc@324 502
mas01mc@324 503 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01mc@324 504
mas01mc@324 505 delete [] thisPowerFileName;
mas01mc@324 506 if(key && (key != inFile)) {
mas01mc@324 507 delete [] thisKey;
mas01mc@324 508 }
mas01mc@324 509 delete [] thisFile;
mas01mc@324 510 delete [] thisTimesFileName;
mas01mc@324 511
mas01mc@324 512 delete filesIn;
mas01mc@324 513 delete keysIn;
mas01mc@324 514
mas01mc@324 515 // Report status
mas01mc@324 516 status(dbName);
mas01mc@324 517 }