annotate insert.cpp @ 369:6564be3109c5 gcc-4.3-cleanups

gcc-4.3 warning cleanups for lshlib.cpp (I do not believe that any of these changes contain significant copyrightable "intellectual property". However, to the extent that they do, the changes are hereby released into the Public Domain, and may be therefore be used by anyone for any purpose without need for consideration of any kind.)
author mas01cr
date Wed, 12 Nov 2008 15:23:32 +0000
parents 521812d63516
children 61b40ed4dc62
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2
mas01cr@251 3 bool audioDB::enough_per_file_space_free() {
mas01cr@251 4 unsigned int fmaxfiles, tmaxfiles;
mas01cr@251 5 unsigned int maxfiles;
mas01cr@251 6
mas01cr@256 7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
mas01cr@256 8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
mas01cr@251 9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@251 10 return(dbH->numFiles < maxfiles);
mas01cr@251 11 }
mas01cr@251 12
mas01cr@239 13 bool audioDB::enough_data_space_free(off_t size) {
mas01mc@324 14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@239 15 }
mas01cr@239 16
mas01cr@239 17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@366 18 if(lseek(dbfid, dbH->dataOffset + offset, SEEK_SET) == (off_t) -1) {
mas01cr@366 19 error("error seeking to offset", "", "lseek");
mas01cr@366 20 }
mas01cr@366 21 CHECKED_WRITE(dbfid, buffer, size);
mas01cr@239 22 }
mas01cr@239 23
mas01cr@239 24 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@239 25 forWrite = true;
mas01cr@239 26 initTables(dbName, inFile);
mas01cr@239 27
mas01mc@324 28 if(dbH->flags & O2_FLAG_LARGE_ADB)
mas01mc@324 29 error("Single-feature inserts not allowed with LARGE audioDB instances");
mas01mc@324 30
mas01cr@239 31 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 32 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 33
mas01cr@239 34 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 35 error("Must use power with power-enabled database", dbName);
mas01cr@239 36
mas01cr@251 37 if(!enough_per_file_space_free()) {
mas01cr@251 38 error("Insert failed: no more room for metadata", inFile);
mas01cr@251 39 }
mas01cr@251 40
mas01cr@239 41 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 42 error("Insert failed: no more room in database", inFile);
mas01cr@239 43 }
mas01cr@239 44
mas01cr@239 45 if(!key)
mas01cr@239 46 key=inFile;
mas01cr@239 47 // Linear scan of filenames check for pre-existing feature
mas01cr@239 48 unsigned alreadyInserted=0;
mas01cr@239 49 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 50 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
mas01cr@239 51 alreadyInserted=1;
mas01cr@239 52 break;
mas01cr@239 53 }
mas01cr@239 54
mas01cr@239 55 if(alreadyInserted) {
mas01cr@239 56 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
mas01mc@324 57 // FIXME: Do we need to munmap here (see below) ? MKC 18/08/08
mas01cr@239 58 return;
mas01cr@239 59 }
mas01cr@239 60
mas01cr@239 61 // Make a track index table of features to file indexes
mas01cr@239 62 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 63 if(!numVectors) {
mas01cr@239 64 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
mas01cr@239 65
mas01cr@239 66 // CLEAN UP
mas01cr@239 67 munmap(indata,statbuf.st_size);
mas01cr@239 68 munmap(db,dbH->dbSize);
mas01cr@239 69 close(infid);
mas01cr@239 70 return;
mas01cr@239 71 }
mas01cr@239 72
mas01mc@324 73 INSERT_FILETABLE_STRING(fileTable, key);
mas01cr@239 74
mas01cr@239 75 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 76
mas01cr@239 77 // Check times status and insert times from file
mas01cr@239 78 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
mas01cr@239 79 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 80
mas01cr@239 81 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 82 error("out of space for times", key);
mas01cr@239 83 }
mas01cr@239 84
mas01cr@239 85 if (usingTimes) {
mas01cr@239 86 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@239 87 }
mas01cr@239 88
mas01cr@239 89 double *powerdata = powerTable + indexoffset;
mas01cr@239 90 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@239 91
mas01cr@239 92 // Increment file count
mas01cr@239 93 dbH->numFiles++;
mas01cr@239 94
mas01cr@239 95 // Update Header information
mas01cr@239 96 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 97
mas01cr@239 98 // Update track to file index map
mas01cr@239 99 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
mas01cr@239 100
mas01cr@239 101 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 102
mas01cr@239 103 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 104 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 105 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 106
mas01cr@239 107 // Report status
mas01cr@239 108 status(dbName);
mas01cr@239 109 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
mas01cr@239 110
mas01cr@239 111 // Copy the header back to the database
mas01cr@239 112 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 113
mas01cr@239 114 // CLEAN UP
mas01cr@239 115 munmap(indata,statbuf.st_size);
mas01cr@239 116 close(infid);
mas01cr@239 117 }
mas01cr@239 118
mas01cr@239 119 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@239 120 assert(usingTimes);
mas01cr@239 121
mas01cr@239 122 unsigned numtimes = 0;
mas01cr@239 123
mas01cr@239 124 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
mas01cr@239 125 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@239 126 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 127 error("Timestamp file used with non-timestamped database", timesFileName);
mas01cr@239 128 }
mas01cr@239 129
mas01cr@239 130 if(!timesFile->is_open()) {
mas01cr@239 131 error("problem opening times file on timestamped database", timesFileName);
mas01cr@239 132 }
mas01cr@239 133
mas01cr@239 134 double timepoint, next;
mas01cr@239 135 *timesFile >> timepoint;
mas01cr@239 136 if (timesFile->eof()) {
mas01cr@239 137 error("no entries in times file", timesFileName);
mas01cr@239 138 }
mas01cr@239 139 numtimes++;
mas01cr@239 140 do {
mas01cr@239 141 *timesFile >> next;
mas01cr@239 142 if (timesFile->eof()) {
mas01cr@239 143 break;
mas01cr@239 144 }
mas01cr@239 145 numtimes++;
mas01cr@239 146 timesdata[0] = timepoint;
mas01cr@239 147 timepoint = (timesdata[1] = next);
mas01cr@239 148 timesdata += 2;
mas01cr@239 149 } while (numtimes < numVectors + 1);
mas01cr@239 150
mas01cr@239 151 if (numtimes < numVectors + 1) {
mas01cr@239 152 error("too few timepoints in times file", timesFileName);
mas01cr@239 153 }
mas01cr@239 154
mas01cr@239 155 *timesFile >> next;
mas01cr@239 156 if (!timesFile->eof()) {
mas01cr@239 157 error("too many timepoints in times file", timesFileName);
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160
mas01cr@239 161 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01mc@324 162 if(usingPower){
mas01cr@239 163 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 164 error("Cannot insert power data on non-power DB", dbName);
mas01cr@239 165 }
mas01mc@324 166
mas01cr@239 167 int one;
mas01cr@239 168 unsigned int count;
mas01mc@324 169
mas01cr@239 170 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@239 171 if (count != sizeof(unsigned int)) {
mas01cr@239 172 error("powerfd read failed", "int", "read");
mas01cr@239 173 }
mas01cr@239 174 if (one != 1) {
mas01cr@239 175 error("dimensionality of power file not 1", powerFileName);
mas01cr@239 176 }
mas01mc@324 177
mas01cr@239 178 // FIXME: should check that the powerfile is the right size for
mas01cr@239 179 // this. -- CSR, 2007-10-30
mas01cr@239 180 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@239 181 if (count != numVectors * sizeof(double)) {
mas01cr@239 182 error("powerfd read failed", "double", "read");
mas01cr@239 183 }
mas01cr@239 184 }
mas01cr@239 185 }
mas01cr@239 186
mas01cr@239 187 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@239 188
mas01cr@239 189 forWrite = true;
mas01cr@239 190 initDBHeader(dbName);
mas01cr@239 191
mas01mc@324 192 // Treat large ADB instances differently
mas01mc@324 193 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 194 batchinsert_large_adb(dbName, inFile) ;
mas01mc@324 195 return;
mas01mc@324 196 }
mas01mc@324 197
mas01cr@239 198 if(!key)
mas01cr@239 199 key=inFile;
mas01cr@239 200 std::ifstream *filesIn = 0;
mas01cr@239 201 std::ifstream *keysIn = 0;
mas01cr@239 202 std::ifstream* thisTimesFile = 0;
mas01cr@239 203 int thispowerfd = 0;
mas01cr@239 204
mas01cr@239 205 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@239 206 error("Could not open batch in file", inFile);
mas01cr@239 207 if(key && key!=inFile)
mas01cr@239 208 if(!(keysIn = new std::ifstream(key)))
mas01cr@239 209 error("Could not open batch key file",key);
mas01cr@239 210
mas01cr@239 211 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 212 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 213
mas01cr@239 214 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 215 error("Must use power with power-enabled database", dbName);
mas01cr@239 216
mas01cr@239 217 unsigned totalVectors=0;
mas01cr@239 218 char *thisFile = new char[MAXSTR];
mas01cr@262 219 char *thisKey = 0;
mas01cr@262 220 if (key && (key != inFile)) {
mas01cr@262 221 thisKey = new char[MAXSTR];
mas01cr@262 222 }
mas01cr@239 223 char *thisTimesFileName = new char[MAXSTR];
mas01cr@239 224 char *thisPowerFileName = new char[MAXSTR];
mas01cr@302 225
mas01cr@302 226 std::set<std::string> s;
mas01cr@302 227
mas01cr@302 228 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01cr@302 229 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01cr@302 230 }
mas01cr@302 231
mas01cr@302 232 do {
mas01cr@239 233 filesIn->getline(thisFile,MAXSTR);
mas01cr@262 234 if(key && key!=inFile) {
mas01cr@239 235 keysIn->getline(thisKey,MAXSTR);
mas01cr@262 236 } else {
mas01cr@239 237 thisKey = thisFile;
mas01cr@262 238 }
mas01cr@262 239 if(usingTimes) {
mas01cr@262 240 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@262 241 }
mas01cr@262 242 if(usingPower) {
mas01cr@239 243 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@262 244 }
mas01cr@239 245
mas01cr@262 246 if(filesIn->eof()) {
mas01cr@239 247 break;
mas01cr@262 248 }
mas01cr@239 249 initInputFile(thisFile);
mas01cr@239 250
mas01cr@251 251 if(!enough_per_file_space_free()) {
mas01cr@251 252 error("batchinsert failed: no more room for metadata", thisFile);
mas01cr@251 253 }
mas01cr@251 254
mas01cr@239 255 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 256 error("batchinsert failed: no more room in database", thisFile);
mas01cr@239 257 }
mas01cr@239 258
mas01cr@302 259 if(s.count(thisKey)) {
mas01cr@239 260 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01cr@239 261 } else {
mas01cr@302 262 s.insert(thisKey);
mas01cr@239 263 // Make a track index table of features to file indexes
mas01cr@239 264 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 265 if(!numVectors) {
mas01cr@239 266 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01cr@239 267 }
mas01cr@239 268 else{
mas01cr@239 269 if(usingTimes){
mas01cr@239 270 if(timesFile->eof()) {
mas01cr@239 271 error("not enough timestamp files in timesList", timesFileName);
mas01cr@239 272 }
mas01cr@239 273 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01cr@239 274 if(!thisTimesFile->is_open()) {
mas01cr@239 275 error("Cannot open timestamp file", thisTimesFileName);
mas01cr@239 276 }
mas01cr@239 277 off_t insertoffset = dbH->length;
mas01cr@239 278 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
mas01cr@239 279 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 280 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 281 error("out of space for times", key);
mas01cr@239 282 }
mas01cr@239 283 insertTimeStamps(numVectors, thisTimesFile, timesdata);
mas01cr@239 284 if(thisTimesFile)
mas01cr@239 285 delete thisTimesFile;
mas01cr@239 286 }
mas01cr@239 287
mas01cr@239 288 if (usingPower) {
mas01cr@239 289 if(powerFile->eof()) {
mas01cr@239 290 error("not enough power files in powerList", powerFileName);
mas01cr@239 291 }
mas01cr@239 292 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@239 293 if (thispowerfd < 0) {
mas01cr@239 294 error("failed to open power file", thisPowerFileName);
mas01cr@239 295 }
mas01cr@239 296 off_t insertoffset = dbH->length;
mas01cr@239 297 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@239 298 double *powerdata = powerTable + poweroffset;
mas01cr@239 299 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@239 300 if (0 < thispowerfd) {
mas01cr@239 301 close(thispowerfd);
mas01cr@239 302 }
mas01cr@239 303 }
mas01mc@324 304
mas01mc@324 305 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01mc@324 306
mas01cr@239 307 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 308
mas01cr@239 309 // Increment file count
mas01cr@239 310 dbH->numFiles++;
mas01cr@239 311
mas01cr@239 312 // Update Header information
mas01cr@239 313 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 314
mas01cr@239 315 // Update track to file index map
mas01cr@239 316 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@324 317
mas01cr@239 318 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 319
mas01cr@239 320 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 321 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 322 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 323
mas01cr@239 324 totalVectors+=numVectors;
mas01cr@239 325
mas01cr@239 326 // Copy the header back to the database
mas01cr@239 327 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 328 }
mas01cr@239 329 }
mas01cr@239 330 // CLEAN UP
mas01cr@239 331 munmap(indata,statbuf.st_size);
mas01cr@239 332 close(infid);
mas01cr@239 333 } while(!filesIn->eof());
mas01cr@239 334
mas01cr@239 335 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01cr@262 336
mas01cr@262 337 delete [] thisPowerFileName;
mas01cr@262 338 if(key && (key != inFile)) {
mas01cr@262 339 delete [] thisKey;
mas01cr@262 340 }
mas01cr@262 341 delete [] thisFile;
mas01cr@262 342 delete [] thisTimesFileName;
mas01cr@239 343
mas01cr@262 344 delete filesIn;
mas01cr@262 345 delete keysIn;
mas01cr@262 346
mas01cr@239 347 // Report status
mas01cr@239 348 status(dbName);
mas01cr@239 349 }
mas01mc@324 350
mas01mc@324 351
mas01mc@324 352 // BATCHINSERT_LARGE_ADB
mas01mc@324 353 //
mas01mc@324 354 // This method inserts file pointers into the ADB instance rather than the actual feature data
mas01mc@324 355 //
mas01mc@324 356 // This method is intended for databases that are large enough to only support indexed query
mas01mc@324 357 // So exhaustive searching across all feature vectors will not be performed
mas01mc@324 358 //
mas01mc@324 359 // We insert featureFileName, [powerFileName], [timesFileName]
mas01mc@324 360 //
mas01mc@324 361 // l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
mas01mc@324 362 //
mas01mc@324 363 // LIMITS:
mas01mc@324 364 //
mas01mc@324 365 // We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
mas01mc@324 366 //
mas01mc@324 367 void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
mas01mc@324 368
mas01mc@324 369 if(!key)
mas01mc@324 370 key=inFile;
mas01mc@324 371 std::ifstream *filesIn = 0;
mas01mc@324 372 std::ifstream *keysIn = 0;
mas01mc@324 373 std::ifstream* thisTimesFile = 0;
mas01mc@324 374 int thispowerfd = 0;
mas01mc@324 375
mas01mc@324 376 if(!(filesIn = new std::ifstream(inFile)))
mas01mc@324 377 error("Could not open batch in file", inFile);
mas01mc@324 378 if(key && key!=inFile)
mas01mc@324 379 if(!(keysIn = new std::ifstream(key)))
mas01mc@324 380 error("Could not open batch key file",key);
mas01mc@324 381
mas01mc@324 382 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01mc@324 383 error("Must use timestamps with timestamped database","use --times");
mas01mc@324 384
mas01mc@324 385 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01mc@324 386 error("Must use power with power-enabled database", dbName);
mas01mc@324 387
mas01mc@324 388 unsigned totalVectors=0;
mas01mc@324 389 char *thisFile = new char[MAXSTR];
mas01mc@324 390 char *thisKey = 0;
mas01mc@324 391 if (key && (key != inFile)) {
mas01mc@324 392 thisKey = new char[MAXSTR];
mas01mc@324 393 }
mas01mc@324 394 char *thisTimesFileName = new char[MAXSTR];
mas01mc@324 395 char *thisPowerFileName = new char[MAXSTR];
mas01mc@324 396
mas01mc@324 397 std::set<std::string> s;
mas01mc@324 398
mas01mc@324 399 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01mc@324 400 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01mc@324 401 }
mas01mc@324 402
mas01mc@324 403 do {
mas01mc@324 404 filesIn->getline(thisFile,MAXSTR);
mas01mc@324 405 if(key && key!=inFile) {
mas01mc@324 406 keysIn->getline(thisKey,MAXSTR);
mas01mc@324 407 } else {
mas01mc@324 408 thisKey = thisFile;
mas01mc@324 409 }
mas01mc@324 410 if(usingTimes) {
mas01mc@324 411 timesFile->getline(thisTimesFileName,MAXSTR);
mas01mc@324 412 }
mas01mc@324 413 if(usingPower) {
mas01mc@324 414 powerFile->getline(thisPowerFileName, MAXSTR);
mas01mc@324 415 }
mas01mc@324 416
mas01mc@324 417 if(filesIn->eof()) {
mas01mc@324 418 break;
mas01mc@324 419 }
mas01mc@324 420
mas01mc@324 421 initInputFile(thisFile, false);
mas01mc@324 422
mas01mc@324 423 if(!enough_per_file_space_free()) {
mas01mc@324 424 error("batchinsert failed: no more room for metadata", thisFile);
mas01mc@324 425 }
mas01mc@324 426
mas01mc@324 427 if(s.count(thisKey)) {
mas01mc@324 428 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01mc@324 429 } else {
mas01mc@324 430 s.insert(thisKey);
mas01mc@324 431 // Make a track index table of features to file indexes
mas01mc@324 432 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01mc@324 433 if(!numVectors) {
mas01mc@324 434 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01mc@324 435 }
mas01mc@324 436 else{
mas01mc@324 437 // Check that time-stamp file exists
mas01mc@324 438 if(usingTimes){
mas01mc@324 439 if(timesFile->eof()) {
mas01mc@324 440 error("not enough timestamp files in timesList", timesFileName);
mas01mc@324 441 }
mas01mc@324 442 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01mc@324 443 if(!thisTimesFile->is_open()) {
mas01mc@324 444 error("Cannot open timestamp file", thisTimesFileName);
mas01mc@324 445 }
mas01mc@324 446 if(thisTimesFile)
mas01mc@324 447 delete thisTimesFile;
mas01mc@324 448 }
mas01mc@324 449
mas01mc@324 450 // Check that power file exists
mas01mc@324 451 if (usingPower) {
mas01mc@324 452 if(powerFile->eof()) {
mas01mc@324 453 error("not enough power files in powerList", powerFileName);
mas01mc@324 454 }
mas01mc@324 455 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01mc@324 456 if (thispowerfd < 0) {
mas01mc@324 457 error("failed to open power file", thisPowerFileName);
mas01mc@324 458 }
mas01mc@324 459 if (0 < thispowerfd) {
mas01mc@324 460 close(thispowerfd);
mas01mc@324 461 }
mas01mc@324 462 }
mas01mc@324 463
mas01mc@324 464 // persist links to the feature files for reading from filesystem later
mas01mc@324 465
mas01mc@324 466 // Primary Keys
mas01mc@324 467 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01mc@324 468
mas01mc@324 469 // Feature Vector fileNames
mas01mc@324 470 INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
mas01mc@324 471
mas01mc@324 472 // Time Stamp fileNames
mas01mc@324 473 if(usingTimes)
mas01mc@324 474 INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
mas01mc@324 475
mas01mc@324 476
mas01mc@324 477 // Power fileNames
mas01mc@324 478 if(usingPower)
mas01mc@324 479 INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
mas01mc@324 480
mas01mc@324 481 // Increment file count
mas01mc@324 482 dbH->numFiles++;
mas01mc@324 483
mas01mc@324 484 // Update Header information
mas01mc@324 485 dbH->length+=(statbuf.st_size-sizeof(int));
mas01mc@324 486
mas01mc@324 487 // Update track to file index map
mas01mc@324 488 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@324 489
mas01mc@324 490 totalVectors+=numVectors;
mas01mc@324 491
mas01mc@324 492 // Copy the header back to the database
mas01mc@324 493 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01mc@324 494 }
mas01mc@324 495 }
mas01mc@324 496 // CLEAN UP
mas01mc@324 497 if(indata)
mas01mc@324 498 munmap(indata,statbuf.st_size);
mas01mc@324 499 if(infid>0)
mas01mc@324 500 close(infid);
mas01mc@324 501 } while(!filesIn->eof());
mas01mc@324 502
mas01mc@324 503 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01mc@324 504
mas01mc@324 505 delete [] thisPowerFileName;
mas01mc@324 506 if(key && (key != inFile)) {
mas01mc@324 507 delete [] thisKey;
mas01mc@324 508 }
mas01mc@324 509 delete [] thisFile;
mas01mc@324 510 delete [] thisTimesFileName;
mas01mc@324 511
mas01mc@324 512 delete filesIn;
mas01mc@324 513 delete keysIn;
mas01mc@324 514
mas01mc@324 515 // Report status
mas01mc@324 516 status(dbName);
mas01mc@324 517 }