annotate insert.cpp @ 277:abfb26e08d9c audiodb-debian

Merge trunk changes -r326:386 into audiodb-debian branch. Plus new debian/changelog version. (Should have used an epoch really, but couldn't be bothered; TODO: work out a sane version numbering policy).
author mas01cr
date Tue, 01 Jul 2008 09:12:40 +0000
parents 15b8ff55ea5b
children
rev   line source
mas01cr@243 1 #include "audioDB.h"
mas01cr@243 2
mas01cr@277 3 bool audioDB::enough_per_file_space_free() {
mas01cr@277 4 unsigned int fmaxfiles, tmaxfiles;
mas01cr@277 5 unsigned int maxfiles;
mas01cr@277 6
mas01cr@277 7 fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
mas01cr@277 8 tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
mas01cr@277 9 maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@277 10 return(dbH->numFiles < maxfiles);
mas01cr@277 11 }
mas01cr@277 12
mas01cr@243 13 bool audioDB::enough_data_space_free(off_t size) {
mas01cr@243 14 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@243 15 }
mas01cr@243 16
mas01cr@243 17 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@243 18 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
mas01cr@243 19 write(dbfid, buffer, size);
mas01cr@243 20 }
mas01cr@243 21
mas01cr@243 22 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@243 23 forWrite = true;
mas01cr@243 24 initTables(dbName, inFile);
mas01cr@243 25
mas01cr@243 26 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@243 27 error("Must use timestamps with timestamped database","use --times");
mas01cr@243 28
mas01cr@243 29 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@243 30 error("Must use power with power-enabled database", dbName);
mas01cr@243 31
mas01cr@277 32 if(!enough_per_file_space_free()) {
mas01cr@277 33 error("Insert failed: no more room for metadata", inFile);
mas01cr@277 34 }
mas01cr@277 35
mas01cr@243 36 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@243 37 error("Insert failed: no more room in database", inFile);
mas01cr@243 38 }
mas01cr@243 39
mas01cr@243 40 if(!key)
mas01cr@243 41 key=inFile;
mas01cr@243 42 // Linear scan of filenames check for pre-existing feature
mas01cr@243 43 unsigned alreadyInserted=0;
mas01cr@243 44 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@277 45 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
mas01cr@243 46 alreadyInserted=1;
mas01cr@243 47 break;
mas01cr@243 48 }
mas01cr@243 49
mas01cr@243 50 if(alreadyInserted) {
mas01cr@243 51 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
mas01cr@243 52 return;
mas01cr@243 53 }
mas01cr@243 54
mas01cr@243 55 // Make a track index table of features to file indexes
mas01cr@243 56 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@243 57 if(!numVectors) {
mas01cr@243 58 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
mas01cr@243 59
mas01cr@243 60 // CLEAN UP
mas01cr@243 61 munmap(indata,statbuf.st_size);
mas01cr@243 62 munmap(db,dbH->dbSize);
mas01cr@243 63 close(infid);
mas01cr@243 64 return;
mas01cr@243 65 }
mas01cr@243 66
mas01cr@277 67 strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, key, strlen(key));
mas01cr@243 68
mas01cr@243 69 off_t insertoffset = dbH->length;// Store current state
mas01cr@243 70
mas01cr@243 71 // Check times status and insert times from file
mas01cr@243 72 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
mas01cr@243 73 double *timesdata = timesTable + 2*indexoffset;
mas01cr@243 74
mas01cr@243 75 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@243 76 error("out of space for times", key);
mas01cr@243 77 }
mas01cr@243 78
mas01cr@243 79 if (usingTimes) {
mas01cr@243 80 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@243 81 }
mas01cr@243 82
mas01cr@243 83 double *powerdata = powerTable + indexoffset;
mas01cr@243 84 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@243 85
mas01cr@243 86 // Increment file count
mas01cr@243 87 dbH->numFiles++;
mas01cr@243 88
mas01cr@243 89 // Update Header information
mas01cr@243 90 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@243 91
mas01cr@243 92 // Update track to file index map
mas01cr@243 93 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
mas01cr@243 94
mas01cr@243 95 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@243 96
mas01cr@243 97 // Norm the vectors on input if the database is already L2 normed
mas01cr@243 98 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@243 99 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@243 100
mas01cr@243 101 // Report status
mas01cr@243 102 status(dbName);
mas01cr@243 103 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
mas01cr@243 104
mas01cr@243 105 // Copy the header back to the database
mas01cr@243 106 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@243 107
mas01cr@243 108 // CLEAN UP
mas01cr@243 109 munmap(indata,statbuf.st_size);
mas01cr@243 110 close(infid);
mas01cr@243 111 }
mas01cr@243 112
mas01cr@243 113 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@243 114 assert(usingTimes);
mas01cr@243 115
mas01cr@243 116 unsigned numtimes = 0;
mas01cr@243 117
mas01cr@243 118 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
mas01cr@243 119 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@243 120 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@243 121 error("Timestamp file used with non-timestamped database", timesFileName);
mas01cr@243 122 }
mas01cr@243 123
mas01cr@243 124 if(!timesFile->is_open()) {
mas01cr@243 125 error("problem opening times file on timestamped database", timesFileName);
mas01cr@243 126 }
mas01cr@243 127
mas01cr@243 128 double timepoint, next;
mas01cr@243 129 *timesFile >> timepoint;
mas01cr@243 130 if (timesFile->eof()) {
mas01cr@243 131 error("no entries in times file", timesFileName);
mas01cr@243 132 }
mas01cr@243 133 numtimes++;
mas01cr@243 134 do {
mas01cr@243 135 *timesFile >> next;
mas01cr@243 136 if (timesFile->eof()) {
mas01cr@243 137 break;
mas01cr@243 138 }
mas01cr@243 139 numtimes++;
mas01cr@243 140 timesdata[0] = timepoint;
mas01cr@243 141 timepoint = (timesdata[1] = next);
mas01cr@243 142 timesdata += 2;
mas01cr@243 143 } while (numtimes < numVectors + 1);
mas01cr@243 144
mas01cr@243 145 if (numtimes < numVectors + 1) {
mas01cr@243 146 error("too few timepoints in times file", timesFileName);
mas01cr@243 147 }
mas01cr@243 148
mas01cr@243 149 *timesFile >> next;
mas01cr@243 150 if (!timesFile->eof()) {
mas01cr@243 151 error("too many timepoints in times file", timesFileName);
mas01cr@243 152 }
mas01cr@243 153 }
mas01cr@243 154
mas01cr@243 155 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01cr@243 156 if (usingPower) {
mas01cr@243 157 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@243 158 error("Cannot insert power data on non-power DB", dbName);
mas01cr@243 159 }
mas01cr@243 160
mas01cr@243 161 int one;
mas01cr@243 162 unsigned int count;
mas01cr@243 163
mas01cr@243 164 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@243 165 if (count != sizeof(unsigned int)) {
mas01cr@243 166 error("powerfd read failed", "int", "read");
mas01cr@243 167 }
mas01cr@243 168 if (one != 1) {
mas01cr@243 169 error("dimensionality of power file not 1", powerFileName);
mas01cr@243 170 }
mas01cr@243 171
mas01cr@243 172 // FIXME: should check that the powerfile is the right size for
mas01cr@243 173 // this. -- CSR, 2007-10-30
mas01cr@243 174 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@243 175 if (count != numVectors * sizeof(double)) {
mas01cr@243 176 error("powerfd read failed", "double", "read");
mas01cr@243 177 }
mas01cr@243 178 }
mas01cr@243 179 }
mas01cr@243 180
mas01cr@243 181 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@243 182
mas01cr@243 183 forWrite = true;
mas01cr@243 184 initDBHeader(dbName);
mas01cr@243 185
mas01cr@243 186 if(!key)
mas01cr@243 187 key=inFile;
mas01cr@243 188 std::ifstream *filesIn = 0;
mas01cr@243 189 std::ifstream *keysIn = 0;
mas01cr@243 190 std::ifstream* thisTimesFile = 0;
mas01cr@243 191 int thispowerfd = 0;
mas01cr@243 192
mas01cr@243 193 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@243 194 error("Could not open batch in file", inFile);
mas01cr@243 195 if(key && key!=inFile)
mas01cr@243 196 if(!(keysIn = new std::ifstream(key)))
mas01cr@243 197 error("Could not open batch key file",key);
mas01cr@243 198
mas01cr@243 199 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@243 200 error("Must use timestamps with timestamped database","use --times");
mas01cr@243 201
mas01cr@243 202 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@243 203 error("Must use power with power-enabled database", dbName);
mas01cr@243 204
mas01cr@243 205 unsigned totalVectors=0;
mas01cr@243 206 char *thisFile = new char[MAXSTR];
mas01cr@277 207 char *thisKey = 0;
mas01cr@277 208 if (key && (key != inFile)) {
mas01cr@277 209 thisKey = new char[MAXSTR];
mas01cr@277 210 }
mas01cr@243 211 char *thisTimesFileName = new char[MAXSTR];
mas01cr@243 212 char *thisPowerFileName = new char[MAXSTR];
mas01cr@243 213
mas01cr@243 214 do{
mas01cr@243 215 filesIn->getline(thisFile,MAXSTR);
mas01cr@277 216 if(key && key!=inFile) {
mas01cr@243 217 keysIn->getline(thisKey,MAXSTR);
mas01cr@277 218 } else {
mas01cr@243 219 thisKey = thisFile;
mas01cr@277 220 }
mas01cr@277 221 if(usingTimes) {
mas01cr@277 222 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@277 223 }
mas01cr@277 224 if(usingPower) {
mas01cr@243 225 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@277 226 }
mas01cr@243 227
mas01cr@277 228 if(filesIn->eof()) {
mas01cr@243 229 break;
mas01cr@277 230 }
mas01cr@277 231 initInputFile(thisFile);
mas01cr@243 232
mas01cr@277 233 if(!enough_per_file_space_free()) {
mas01cr@277 234 error("batchinsert failed: no more room for metadata", thisFile);
mas01cr@277 235 }
mas01cr@243 236
mas01cr@243 237 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@243 238 error("batchinsert failed: no more room in database", thisFile);
mas01cr@243 239 }
mas01cr@243 240
mas01cr@243 241 // Linear scan of filenames check for pre-existing feature
mas01cr@243 242 unsigned alreadyInserted=0;
mas01cr@243 243
mas01cr@243 244 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@277 245 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey)+1)==0){
mas01cr@243 246 alreadyInserted=1;
mas01cr@243 247 break;
mas01cr@243 248 }
mas01cr@243 249
mas01cr@243 250 if(alreadyInserted) {
mas01cr@243 251 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01cr@243 252 } else {
mas01cr@243 253 // Make a track index table of features to file indexes
mas01cr@243 254 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@243 255 if(!numVectors) {
mas01cr@243 256 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01cr@243 257 }
mas01cr@243 258 else{
mas01cr@243 259 if(usingTimes){
mas01cr@243 260 if(timesFile->eof()) {
mas01cr@243 261 error("not enough timestamp files in timesList", timesFileName);
mas01cr@243 262 }
mas01cr@243 263 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01cr@243 264 if(!thisTimesFile->is_open()) {
mas01cr@243 265 error("Cannot open timestamp file", thisTimesFileName);
mas01cr@243 266 }
mas01cr@243 267 off_t insertoffset = dbH->length;
mas01cr@243 268 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
mas01cr@243 269 double *timesdata = timesTable + 2*indexoffset;
mas01cr@243 270 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@243 271 error("out of space for times", key);
mas01cr@243 272 }
mas01cr@243 273 insertTimeStamps(numVectors, thisTimesFile, timesdata);
mas01cr@243 274 if(thisTimesFile)
mas01cr@243 275 delete thisTimesFile;
mas01cr@243 276 }
mas01cr@243 277
mas01cr@243 278 if (usingPower) {
mas01cr@243 279 if(powerFile->eof()) {
mas01cr@243 280 error("not enough power files in powerList", powerFileName);
mas01cr@243 281 }
mas01cr@243 282 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@243 283 if (thispowerfd < 0) {
mas01cr@243 284 error("failed to open power file", thisPowerFileName);
mas01cr@243 285 }
mas01cr@243 286 off_t insertoffset = dbH->length;
mas01cr@243 287 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@243 288 double *powerdata = powerTable + poweroffset;
mas01cr@243 289 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@243 290 if (0 < thispowerfd) {
mas01cr@243 291 close(thispowerfd);
mas01cr@243 292 }
mas01cr@243 293 }
mas01cr@277 294 strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey));
mas01cr@243 295
mas01cr@243 296 off_t insertoffset = dbH->length;// Store current state
mas01cr@243 297
mas01cr@243 298 // Increment file count
mas01cr@243 299 dbH->numFiles++;
mas01cr@243 300
mas01cr@243 301 // Update Header information
mas01cr@243 302 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@243 303
mas01cr@243 304 // Update track to file index map
mas01cr@243 305 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@243 306
mas01cr@243 307 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@243 308
mas01cr@243 309 // Norm the vectors on input if the database is already L2 normed
mas01cr@243 310 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@243 311 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@243 312
mas01cr@243 313 totalVectors+=numVectors;
mas01cr@243 314
mas01cr@243 315 // Copy the header back to the database
mas01cr@243 316 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@243 317 }
mas01cr@243 318 }
mas01cr@243 319 // CLEAN UP
mas01cr@243 320 munmap(indata,statbuf.st_size);
mas01cr@243 321 close(infid);
mas01cr@243 322 } while(!filesIn->eof());
mas01cr@243 323
mas01cr@243 324 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01cr@277 325
mas01cr@277 326 delete [] thisPowerFileName;
mas01cr@277 327 if(key && (key != inFile)) {
mas01cr@277 328 delete [] thisKey;
mas01cr@277 329 }
mas01cr@277 330 delete [] thisFile;
mas01cr@277 331 delete [] thisTimesFileName;
mas01cr@243 332
mas01cr@277 333 delete filesIn;
mas01cr@277 334 delete keysIn;
mas01cr@277 335
mas01cr@243 336 // Report status
mas01cr@243 337 status(dbName);
mas01cr@243 338 }