annotate insert.cpp @ 512:6439cfba2524 memory-leaks

Implemented correct LSH table via compile-time switch -DLSH_DUMP_CORE_TABLES. Dumps on LSH load.
author mas01mc
date Fri, 23 Jan 2009 18:45:44 +0000
parents 7d6dd067d12e
children a8a5f2ca5380 342822c2d49a
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2
mas01cr@251 3 bool audioDB::enough_per_file_space_free() {
mas01cr@251 4 unsigned int fmaxfiles, tmaxfiles;
mas01cr@251 5 unsigned int maxfiles;
mas01cr@251 6
mas01cr@256 7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
mas01cr@256 8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
mas01cr@251 9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@251 10 return(dbH->numFiles < maxfiles);
mas01cr@251 11 }
mas01cr@251 12
mas01cr@239 13 bool audioDB::enough_data_space_free(off_t size) {
mas01mc@324 14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@239 15 }
mas01cr@239 16
mas01cr@239 17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@370 18 if(lseek(dbfid, dbH->dataOffset + offset, SEEK_SET) == (off_t) -1) {
mas01cr@370 19 error("error seeking to offset", "", "lseek");
mas01cr@370 20 }
mas01cr@370 21 CHECKED_WRITE(dbfid, buffer, size);
mas01cr@239 22 }
mas01cr@239 23
mas01cr@239 24 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@239 25 forWrite = true;
mas01cr@239 26 initTables(dbName, inFile);
mas01cr@239 27
mas01mc@324 28 if(dbH->flags & O2_FLAG_LARGE_ADB)
mas01mc@324 29 error("Single-feature inserts not allowed with LARGE audioDB instances");
mas01mc@324 30
mas01cr@239 31 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 32 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 33
mas01cr@239 34 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 35 error("Must use power with power-enabled database", dbName);
mas01cr@239 36
mas01cr@251 37 if(!enough_per_file_space_free()) {
mas01cr@251 38 error("Insert failed: no more room for metadata", inFile);
mas01cr@251 39 }
mas01cr@251 40
mas01cr@239 41 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 42 error("Insert failed: no more room in database", inFile);
mas01cr@239 43 }
mas01cr@239 44
mas01cr@239 45 if(!key)
mas01cr@239 46 key=inFile;
mas01cr@239 47 // Linear scan of filenames check for pre-existing feature
mas01cr@239 48 unsigned alreadyInserted=0;
mas01cr@239 49 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 50 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
mas01cr@239 51 alreadyInserted=1;
mas01cr@239 52 break;
mas01cr@239 53 }
mas01cr@239 54
mas01cr@239 55 if(alreadyInserted) {
mas01cr@239 56 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
mas01mc@324 57 // FIXME: Do we need to munmap here (see below) ? MKC 18/08/08
mas01cr@239 58 return;
mas01cr@239 59 }
mas01cr@239 60
mas01cr@239 61 // Make a track index table of features to file indexes
mas01cr@239 62 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 63 if(!numVectors) {
mas01cr@239 64 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
mas01cr@239 65
mas01cr@239 66 // CLEAN UP
mas01cr@239 67 munmap(indata,statbuf.st_size);
mas01cr@239 68 munmap(db,dbH->dbSize);
mas01cr@239 69 close(infid);
mas01cr@239 70 return;
mas01cr@239 71 }
mas01cr@239 72
mas01mc@324 73 INSERT_FILETABLE_STRING(fileTable, key);
mas01cr@239 74
mas01cr@239 75 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 76
mas01cr@239 77 // Check times status and insert times from file
mas01cr@239 78 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
mas01cr@239 79 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 80
mas01cr@239 81 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 82 error("out of space for times", key);
mas01cr@239 83 }
mas01cr@239 84
mas01cr@239 85 if (usingTimes) {
mas01cr@239 86 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@239 87 }
mas01cr@239 88
mas01cr@239 89 double *powerdata = powerTable + indexoffset;
mas01cr@239 90 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@239 91
mas01cr@239 92 // Increment file count
mas01cr@239 93 dbH->numFiles++;
mas01cr@239 94
mas01cr@239 95 // Update Header information
mas01cr@239 96 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 97
mas01cr@239 98 // Update track to file index map
mas01cr@239 99 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
mas01cr@239 100
mas01cr@239 101 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 102
mas01cr@239 103 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 104 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 105 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 106
mas01cr@239 107 // Report status
mas01cr@239 108 status(dbName);
mas01cr@239 109 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
mas01cr@239 110
mas01cr@239 111 // Copy the header back to the database
mas01cr@239 112 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 113
mas01cr@239 114 // CLEAN UP
mas01cr@239 115 munmap(indata,statbuf.st_size);
mas01cr@239 116 close(infid);
mas01cr@239 117 }
mas01cr@239 118
mas01cr@239 119 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@239 120 assert(usingTimes);
mas01cr@239 121
mas01cr@239 122 unsigned numtimes = 0;
mas01cr@239 123
mas01cr@239 124 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
mas01cr@239 125 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@239 126 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 127 error("Timestamp file used with non-timestamped database", timesFileName);
mas01cr@239 128 }
mas01cr@239 129
mas01cr@239 130 if(!timesFile->is_open()) {
mas01cr@239 131 error("problem opening times file on timestamped database", timesFileName);
mas01cr@239 132 }
mas01cr@239 133
mas01cr@239 134 double timepoint, next;
mas01cr@239 135 *timesFile >> timepoint;
mas01cr@239 136 if (timesFile->eof()) {
mas01cr@239 137 error("no entries in times file", timesFileName);
mas01cr@239 138 }
mas01cr@239 139 numtimes++;
mas01cr@239 140 do {
mas01cr@239 141 *timesFile >> next;
mas01cr@239 142 if (timesFile->eof()) {
mas01cr@239 143 break;
mas01cr@239 144 }
mas01cr@239 145 numtimes++;
mas01cr@239 146 timesdata[0] = timepoint;
mas01cr@239 147 timepoint = (timesdata[1] = next);
mas01cr@239 148 timesdata += 2;
mas01cr@239 149 } while (numtimes < numVectors + 1);
mas01cr@239 150
mas01cr@239 151 if (numtimes < numVectors + 1) {
mas01cr@239 152 error("too few timepoints in times file", timesFileName);
mas01cr@239 153 }
mas01cr@239 154
mas01cr@239 155 *timesFile >> next;
mas01cr@239 156 if (!timesFile->eof()) {
mas01cr@239 157 error("too many timepoints in times file", timesFileName);
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160
mas01cr@239 161 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01mc@324 162 if(usingPower){
mas01cr@239 163 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 164 error("Cannot insert power data on non-power DB", dbName);
mas01cr@239 165 }
mas01mc@324 166
mas01cr@239 167 int one;
mas01cr@239 168 unsigned int count;
mas01mc@324 169
mas01cr@239 170 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@239 171 if (count != sizeof(unsigned int)) {
mas01cr@239 172 error("powerfd read failed", "int", "read");
mas01cr@239 173 }
mas01cr@239 174 if (one != 1) {
mas01cr@239 175 error("dimensionality of power file not 1", powerFileName);
mas01cr@239 176 }
mas01mc@324 177
mas01cr@239 178 // FIXME: should check that the powerfile is the right size for
mas01cr@239 179 // this. -- CSR, 2007-10-30
mas01cr@239 180 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@239 181 if (count != numVectors * sizeof(double)) {
mas01cr@239 182 error("powerfd read failed", "double", "read");
mas01cr@239 183 }
mas01cr@239 184 }
mas01cr@239 185 }
mas01cr@239 186
mas01cr@239 187 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@239 188
mas01cr@239 189 forWrite = true;
mas01cr@239 190 initDBHeader(dbName);
mas01cr@239 191
mas01mc@324 192 // Treat large ADB instances differently
mas01mc@324 193 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 194 batchinsert_large_adb(dbName, inFile) ;
mas01mc@324 195 return;
mas01mc@324 196 }
mas01mc@324 197
mas01cr@239 198 if(!key)
mas01cr@239 199 key=inFile;
mas01cr@239 200 std::ifstream *filesIn = 0;
mas01cr@239 201 std::ifstream *keysIn = 0;
mas01cr@239 202 std::ifstream* thisTimesFile = 0;
mas01cr@239 203 int thispowerfd = 0;
mas01cr@239 204
mas01cr@239 205 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@239 206 error("Could not open batch in file", inFile);
mas01cr@239 207 if(key && key!=inFile)
mas01cr@239 208 if(!(keysIn = new std::ifstream(key)))
mas01cr@239 209 error("Could not open batch key file",key);
mas01cr@239 210
mas01cr@239 211 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 212 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 213
mas01cr@239 214 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 215 error("Must use power with power-enabled database", dbName);
mas01cr@239 216
mas01cr@239 217 unsigned totalVectors=0;
mas01cr@239 218 char *thisFile = new char[MAXSTR];
mas01cr@262 219 char *thisKey = 0;
mas01cr@262 220 if (key && (key != inFile)) {
mas01cr@262 221 thisKey = new char[MAXSTR];
mas01cr@262 222 }
mas01cr@239 223 char *thisTimesFileName = new char[MAXSTR];
mas01cr@239 224 char *thisPowerFileName = new char[MAXSTR];
mas01cr@302 225
mas01cr@302 226 std::set<std::string> s;
mas01cr@302 227
mas01cr@302 228 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01cr@302 229 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01cr@302 230 }
mas01cr@302 231
mas01cr@302 232 do {
mas01cr@239 233 filesIn->getline(thisFile,MAXSTR);
mas01cr@262 234 if(key && key!=inFile) {
mas01cr@239 235 keysIn->getline(thisKey,MAXSTR);
mas01cr@262 236 } else {
mas01cr@239 237 thisKey = thisFile;
mas01cr@262 238 }
mas01cr@262 239 if(usingTimes) {
mas01cr@262 240 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@262 241 }
mas01cr@262 242 if(usingPower) {
mas01cr@239 243 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@262 244 }
mas01cr@239 245
mas01cr@262 246 if(filesIn->eof()) {
mas01cr@239 247 break;
mas01cr@262 248 }
mas01cr@239 249 initInputFile(thisFile);
mas01cr@239 250
mas01cr@251 251 if(!enough_per_file_space_free()) {
mas01cr@251 252 error("batchinsert failed: no more room for metadata", thisFile);
mas01cr@251 253 }
mas01cr@251 254
mas01cr@239 255 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 256 error("batchinsert failed: no more room in database", thisFile);
mas01cr@239 257 }
mas01cr@239 258
mas01cr@302 259 if(s.count(thisKey)) {
mas01cr@239 260 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01cr@239 261 } else {
mas01cr@302 262 s.insert(thisKey);
mas01cr@239 263 // Make a track index table of features to file indexes
mas01cr@239 264 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 265 if(!numVectors) {
mas01cr@239 266 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01cr@239 267 }
mas01cr@239 268 else{
mas01cr@239 269 if(usingTimes){
mas01cr@239 270 if(timesFile->eof()) {
mas01cr@239 271 error("not enough timestamp files in timesList", timesFileName);
mas01cr@239 272 }
mas01cr@239 273 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01cr@239 274 if(!thisTimesFile->is_open()) {
mas01cr@239 275 error("Cannot open timestamp file", thisTimesFileName);
mas01cr@239 276 }
mas01cr@239 277 off_t insertoffset = dbH->length;
mas01cr@239 278 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
mas01cr@239 279 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 280 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 281 error("out of space for times", key);
mas01cr@239 282 }
mas01cr@239 283 insertTimeStamps(numVectors, thisTimesFile, timesdata);
mas01cr@239 284 if(thisTimesFile)
mas01cr@239 285 delete thisTimesFile;
mas01cr@239 286 }
mas01cr@239 287
mas01cr@239 288 if (usingPower) {
mas01cr@239 289 if(powerFile->eof()) {
mas01cr@239 290 error("not enough power files in powerList", powerFileName);
mas01cr@239 291 }
mas01cr@239 292 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@239 293 if (thispowerfd < 0) {
mas01cr@239 294 error("failed to open power file", thisPowerFileName);
mas01cr@239 295 }
mas01cr@239 296 off_t insertoffset = dbH->length;
mas01cr@239 297 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@239 298 double *powerdata = powerTable + poweroffset;
mas01cr@239 299 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@239 300 if (0 < thispowerfd) {
mas01cr@239 301 close(thispowerfd);
mas01cr@239 302 }
mas01cr@239 303 }
mas01mc@324 304
mas01mc@324 305 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01mc@324 306
mas01cr@239 307 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 308
mas01cr@239 309 // Increment file count
mas01cr@239 310 dbH->numFiles++;
mas01cr@239 311
mas01cr@239 312 // Update Header information
mas01cr@239 313 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 314
mas01cr@239 315 // Update track to file index map
mas01cr@239 316 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@324 317
mas01cr@239 318 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 319
mas01cr@239 320 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 321 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 322 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 323
mas01cr@239 324 totalVectors+=numVectors;
mas01cr@239 325
mas01cr@239 326 // Copy the header back to the database
mas01cr@239 327 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 328 }
mas01cr@239 329 }
mas01cr@377 330
mas01cr@239 331 // CLEAN UP
mas01cr@239 332 munmap(indata,statbuf.st_size);
mas01cr@377 333 indata = NULL;
mas01cr@239 334 close(infid);
mas01cr@377 335 infid = 0;
mas01cr@239 336 } while(!filesIn->eof());
mas01cr@239 337
mas01cr@239 338 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01cr@262 339
mas01cr@262 340 delete [] thisPowerFileName;
mas01cr@262 341 if(key && (key != inFile)) {
mas01cr@262 342 delete [] thisKey;
mas01cr@262 343 }
mas01cr@262 344 delete [] thisFile;
mas01cr@262 345 delete [] thisTimesFileName;
mas01cr@239 346
mas01cr@262 347 delete filesIn;
mas01cr@262 348 delete keysIn;
mas01cr@262 349
mas01cr@239 350 // Report status
mas01cr@239 351 status(dbName);
mas01cr@239 352 }
mas01mc@324 353
mas01mc@324 354
mas01mc@324 355 // BATCHINSERT_LARGE_ADB
mas01mc@324 356 //
mas01mc@324 357 // This method inserts file pointers into the ADB instance rather than the actual feature data
mas01mc@324 358 //
mas01mc@324 359 // This method is intended for databases that are large enough to only support indexed query
mas01mc@324 360 // So exhaustive searching across all feature vectors will not be performed
mas01mc@324 361 //
mas01mc@324 362 // We insert featureFileName, [powerFileName], [timesFileName]
mas01mc@324 363 //
mas01mc@324 364 // l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
mas01mc@324 365 //
mas01mc@324 366 // LIMITS:
mas01mc@324 367 //
mas01mc@324 368 // We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
mas01mc@324 369 //
mas01mc@324 370 void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
mas01mc@324 371
mas01mc@324 372 if(!key)
mas01mc@324 373 key=inFile;
mas01mc@324 374 std::ifstream *filesIn = 0;
mas01mc@324 375 std::ifstream *keysIn = 0;
mas01mc@324 376 std::ifstream* thisTimesFile = 0;
mas01mc@324 377 int thispowerfd = 0;
mas01mc@324 378
mas01mc@324 379 if(!(filesIn = new std::ifstream(inFile)))
mas01mc@324 380 error("Could not open batch in file", inFile);
mas01mc@324 381 if(key && key!=inFile)
mas01mc@324 382 if(!(keysIn = new std::ifstream(key)))
mas01mc@324 383 error("Could not open batch key file",key);
mas01mc@324 384
mas01mc@324 385 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01mc@324 386 error("Must use timestamps with timestamped database","use --times");
mas01mc@324 387
mas01mc@324 388 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01mc@324 389 error("Must use power with power-enabled database", dbName);
mas01mc@324 390
mas01cr@380 391 char *cwd = new char[PATH_MAX];
mas01cr@380 392
mas01cr@380 393 if ((getcwd(cwd, PATH_MAX)) == 0) {
mas01cr@380 394 error("error getting working directory", "", "getcwd");
mas01cr@380 395 }
mas01cr@380 396
mas01mc@324 397 unsigned totalVectors=0;
mas01mc@324 398 char *thisFile = new char[MAXSTR];
mas01mc@324 399 char *thisKey = 0;
mas01mc@324 400 if (key && (key != inFile)) {
mas01mc@324 401 thisKey = new char[MAXSTR];
mas01mc@324 402 }
mas01mc@324 403 char *thisTimesFileName = new char[MAXSTR];
mas01mc@324 404 char *thisPowerFileName = new char[MAXSTR];
mas01mc@324 405
mas01mc@324 406 std::set<std::string> s;
mas01mc@324 407
mas01mc@324 408 for (unsigned k = 0; k < dbH->numFiles; k++) {
mas01mc@324 409 s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01mc@324 410 }
mas01mc@324 411
mas01mc@324 412 do {
mas01mc@324 413 filesIn->getline(thisFile,MAXSTR);
mas01mc@324 414 if(key && key!=inFile) {
mas01mc@324 415 keysIn->getline(thisKey,MAXSTR);
mas01mc@324 416 } else {
mas01mc@324 417 thisKey = thisFile;
mas01mc@324 418 }
mas01mc@324 419 if(usingTimes) {
mas01mc@324 420 timesFile->getline(thisTimesFileName,MAXSTR);
mas01mc@324 421 }
mas01mc@324 422 if(usingPower) {
mas01mc@324 423 powerFile->getline(thisPowerFileName, MAXSTR);
mas01mc@324 424 }
mas01mc@324 425
mas01mc@324 426 if(filesIn->eof()) {
mas01mc@324 427 break;
mas01mc@324 428 }
mas01mc@324 429
mas01mc@324 430 initInputFile(thisFile, false);
mas01mc@324 431
mas01mc@324 432 if(!enough_per_file_space_free()) {
mas01mc@324 433 error("batchinsert failed: no more room for metadata", thisFile);
mas01mc@324 434 }
mas01mc@324 435
mas01mc@324 436 if(s.count(thisKey)) {
mas01mc@324 437 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01mc@324 438 } else {
mas01mc@324 439 s.insert(thisKey);
mas01mc@324 440 // Make a track index table of features to file indexes
mas01mc@324 441 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01mc@324 442 if(!numVectors) {
mas01mc@324 443 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01mc@324 444 }
mas01mc@324 445 else{
mas01mc@324 446 // Check that time-stamp file exists
mas01mc@324 447 if(usingTimes){
mas01mc@324 448 if(timesFile->eof()) {
mas01mc@324 449 error("not enough timestamp files in timesList", timesFileName);
mas01mc@324 450 }
mas01mc@324 451 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01mc@324 452 if(!thisTimesFile->is_open()) {
mas01mc@324 453 error("Cannot open timestamp file", thisTimesFileName);
mas01mc@324 454 }
mas01mc@324 455 if(thisTimesFile)
mas01mc@324 456 delete thisTimesFile;
mas01mc@324 457 }
mas01mc@324 458
mas01mc@324 459 // Check that power file exists
mas01mc@324 460 if (usingPower) {
mas01mc@324 461 if(powerFile->eof()) {
mas01mc@324 462 error("not enough power files in powerList", powerFileName);
mas01mc@324 463 }
mas01mc@324 464 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01mc@324 465 if (thispowerfd < 0) {
mas01mc@324 466 error("failed to open power file", thisPowerFileName);
mas01mc@324 467 }
mas01mc@324 468 if (0 < thispowerfd) {
mas01mc@324 469 close(thispowerfd);
mas01mc@324 470 }
mas01mc@324 471 }
mas01mc@324 472
mas01mc@324 473 // persist links to the feature files for reading from filesystem later
mas01mc@324 474
mas01mc@324 475 // Primary Keys
mas01mc@324 476 INSERT_FILETABLE_STRING(fileTable, thisKey);
mas01cr@380 477
mas01cr@380 478 if(*thisFile != '/') {
mas01cr@380 479 /* FIXME: MAXSTR and O2_FILETABLE_ENTRY_SIZE should probably
mas01cr@380 480 be the same thing. Also, both are related to PATH_MAX,
mas01cr@380 481 which admittedly is not always defined or a
mas01cr@380 482 constant... */
mas01cr@380 483 char tmp[MAXSTR];
mas01cr@380 484 strncpy(tmp, thisFile, MAXSTR);
mas01cr@380 485 snprintf(thisFile, MAXSTR, "%s/%s", cwd, tmp);
mas01cr@380 486 }
mas01mc@324 487 // Feature Vector fileNames
mas01mc@324 488 INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
mas01mc@324 489
mas01mc@324 490 // Time Stamp fileNames
mas01cr@380 491 if(usingTimes) {
mas01cr@380 492 if(*thisTimesFileName != '/') {
mas01cr@380 493 char tmp[MAXSTR];
mas01cr@380 494 strncpy(tmp, thisTimesFileName, MAXSTR);
mas01cr@380 495 snprintf(thisTimesFileName, MAXSTR, "%s/%s", cwd, tmp);
mas01cr@380 496 }
mas01mc@324 497 INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
mas01cr@380 498 }
mas01mc@324 499
mas01mc@324 500 // Power fileNames
mas01cr@380 501 if(usingPower) {
mas01cr@380 502 if(*thisPowerFileName != '/') {
mas01cr@380 503 char tmp[MAXSTR];
mas01cr@380 504 strncpy(tmp, thisPowerFileName, MAXSTR);
mas01cr@380 505 snprintf(thisPowerFileName, MAXSTR, "%s/%s", cwd, tmp);
mas01cr@380 506 }
mas01mc@324 507 INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
mas01cr@380 508 }
mas01mc@324 509
mas01mc@324 510 // Increment file count
mas01mc@324 511 dbH->numFiles++;
mas01mc@324 512
mas01mc@324 513 // Update Header information
mas01mc@324 514 dbH->length+=(statbuf.st_size-sizeof(int));
mas01mc@324 515
mas01mc@324 516 // Update track to file index map
mas01mc@324 517 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01mc@324 518
mas01mc@324 519 totalVectors+=numVectors;
mas01mc@324 520
mas01mc@324 521 // Copy the header back to the database
mas01mc@324 522 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01mc@324 523 }
mas01mc@324 524 }
mas01mc@324 525 // CLEAN UP
mas01mc@324 526 if(indata)
mas01mc@324 527 munmap(indata,statbuf.st_size);
mas01mc@324 528 if(infid>0)
mas01mc@324 529 close(infid);
mas01mc@324 530 } while(!filesIn->eof());
mas01mc@324 531
mas01mc@324 532 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01mc@324 533
mas01mc@324 534 delete [] thisPowerFileName;
mas01mc@324 535 if(key && (key != inFile)) {
mas01mc@324 536 delete [] thisKey;
mas01mc@324 537 }
mas01mc@324 538 delete [] thisFile;
mas01mc@324 539 delete [] thisTimesFileName;
mas01mc@324 540
mas01mc@324 541 delete filesIn;
mas01mc@324 542 delete keysIn;
mas01mc@324 543
mas01mc@324 544 // Report status
mas01mc@324 545 status(dbName);
mas01mc@324 546 }