annotate insert.cpp @ 249:1da9a9ed55a3

Slightly refactored the new trackSequenceQueryNNReporter so that it is a derived class of trackAveragingReporter. This reduces code duplication significantly. The reporter is still accessed via the nsequence QUERY directive from the command line.
author mas01mc
date Sun, 17 Feb 2008 16:39:57 +0000
parents 2cc06e5b05a5
children a6c9a1c68646 abfb26e08d9c
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2
mas01cr@239 3 bool audioDB::enough_data_space_free(off_t size) {
mas01cr@239 4 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@239 5 }
mas01cr@239 6
mas01cr@239 7 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@239 8 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
mas01cr@239 9 write(dbfid, buffer, size);
mas01cr@239 10 }
mas01cr@239 11
mas01cr@239 12 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@239 13 forWrite = true;
mas01cr@239 14 initTables(dbName, inFile);
mas01cr@239 15
mas01cr@239 16 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 17 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 18
mas01cr@239 19 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 20 error("Must use power with power-enabled database", dbName);
mas01cr@239 21
mas01cr@239 22 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 23 error("Insert failed: no more room in database", inFile);
mas01cr@239 24 }
mas01cr@239 25
mas01cr@239 26 if(!key)
mas01cr@239 27 key=inFile;
mas01cr@239 28 // Linear scan of filenames check for pre-existing feature
mas01cr@239 29 unsigned alreadyInserted=0;
mas01cr@239 30 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@239 31 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){
mas01cr@239 32 alreadyInserted=1;
mas01cr@239 33 break;
mas01cr@239 34 }
mas01cr@239 35
mas01cr@239 36 if(alreadyInserted) {
mas01cr@239 37 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
mas01cr@239 38 return;
mas01cr@239 39 }
mas01cr@239 40
mas01cr@239 41 // Make a track index table of features to file indexes
mas01cr@239 42 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 43 if(!numVectors) {
mas01cr@239 44 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
mas01cr@239 45
mas01cr@239 46 // CLEAN UP
mas01cr@239 47 munmap(indata,statbuf.st_size);
mas01cr@239 48 munmap(db,dbH->dbSize);
mas01cr@239 49 close(infid);
mas01cr@239 50 return;
mas01cr@239 51 }
mas01cr@239 52
mas01cr@239 53 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
mas01cr@239 54
mas01cr@239 55 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 56
mas01cr@239 57 // Check times status and insert times from file
mas01cr@239 58 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
mas01cr@239 59 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 60
mas01cr@239 61 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 62 error("out of space for times", key);
mas01cr@239 63 }
mas01cr@239 64
mas01cr@239 65 if (usingTimes) {
mas01cr@239 66 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@239 67 }
mas01cr@239 68
mas01cr@239 69 double *powerdata = powerTable + indexoffset;
mas01cr@239 70 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@239 71
mas01cr@239 72 // Increment file count
mas01cr@239 73 dbH->numFiles++;
mas01cr@239 74
mas01cr@239 75 // Update Header information
mas01cr@239 76 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 77
mas01cr@239 78 // Update track to file index map
mas01cr@239 79 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
mas01cr@239 80
mas01cr@239 81 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 82
mas01cr@239 83 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 84 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 85 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 86
mas01cr@239 87 // Report status
mas01cr@239 88 status(dbName);
mas01cr@239 89 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
mas01cr@239 90
mas01cr@239 91 // Copy the header back to the database
mas01cr@239 92 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 93
mas01cr@239 94 // CLEAN UP
mas01cr@239 95 munmap(indata,statbuf.st_size);
mas01cr@239 96 close(infid);
mas01cr@239 97 }
mas01cr@239 98
mas01cr@239 99 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@239 100 assert(usingTimes);
mas01cr@239 101
mas01cr@239 102 unsigned numtimes = 0;
mas01cr@239 103
mas01cr@239 104 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
mas01cr@239 105 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@239 106 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 107 error("Timestamp file used with non-timestamped database", timesFileName);
mas01cr@239 108 }
mas01cr@239 109
mas01cr@239 110 if(!timesFile->is_open()) {
mas01cr@239 111 error("problem opening times file on timestamped database", timesFileName);
mas01cr@239 112 }
mas01cr@239 113
mas01cr@239 114 double timepoint, next;
mas01cr@239 115 *timesFile >> timepoint;
mas01cr@239 116 if (timesFile->eof()) {
mas01cr@239 117 error("no entries in times file", timesFileName);
mas01cr@239 118 }
mas01cr@239 119 numtimes++;
mas01cr@239 120 do {
mas01cr@239 121 *timesFile >> next;
mas01cr@239 122 if (timesFile->eof()) {
mas01cr@239 123 break;
mas01cr@239 124 }
mas01cr@239 125 numtimes++;
mas01cr@239 126 timesdata[0] = timepoint;
mas01cr@239 127 timepoint = (timesdata[1] = next);
mas01cr@239 128 timesdata += 2;
mas01cr@239 129 } while (numtimes < numVectors + 1);
mas01cr@239 130
mas01cr@239 131 if (numtimes < numVectors + 1) {
mas01cr@239 132 error("too few timepoints in times file", timesFileName);
mas01cr@239 133 }
mas01cr@239 134
mas01cr@239 135 *timesFile >> next;
mas01cr@239 136 if (!timesFile->eof()) {
mas01cr@239 137 error("too many timepoints in times file", timesFileName);
mas01cr@239 138 }
mas01cr@239 139 }
mas01cr@239 140
mas01cr@239 141 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01cr@239 142 if (usingPower) {
mas01cr@239 143 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 144 error("Cannot insert power data on non-power DB", dbName);
mas01cr@239 145 }
mas01cr@239 146
mas01cr@239 147 int one;
mas01cr@239 148 unsigned int count;
mas01cr@239 149
mas01cr@239 150 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@239 151 if (count != sizeof(unsigned int)) {
mas01cr@239 152 error("powerfd read failed", "int", "read");
mas01cr@239 153 }
mas01cr@239 154 if (one != 1) {
mas01cr@239 155 error("dimensionality of power file not 1", powerFileName);
mas01cr@239 156 }
mas01cr@239 157
mas01cr@239 158 // FIXME: should check that the powerfile is the right size for
mas01cr@239 159 // this. -- CSR, 2007-10-30
mas01cr@239 160 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@239 161 if (count != numVectors * sizeof(double)) {
mas01cr@239 162 error("powerfd read failed", "double", "read");
mas01cr@239 163 }
mas01cr@239 164 }
mas01cr@239 165 }
mas01cr@239 166
mas01cr@239 167 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@239 168
mas01cr@239 169 forWrite = true;
mas01cr@239 170 initDBHeader(dbName);
mas01cr@239 171
mas01cr@239 172 if(!key)
mas01cr@239 173 key=inFile;
mas01cr@239 174 std::ifstream *filesIn = 0;
mas01cr@239 175 std::ifstream *keysIn = 0;
mas01cr@239 176 std::ifstream* thisTimesFile = 0;
mas01cr@239 177 int thispowerfd = 0;
mas01cr@239 178
mas01cr@239 179 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@239 180 error("Could not open batch in file", inFile);
mas01cr@239 181 if(key && key!=inFile)
mas01cr@239 182 if(!(keysIn = new std::ifstream(key)))
mas01cr@239 183 error("Could not open batch key file",key);
mas01cr@239 184
mas01cr@239 185 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@239 186 error("Must use timestamps with timestamped database","use --times");
mas01cr@239 187
mas01cr@239 188 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@239 189 error("Must use power with power-enabled database", dbName);
mas01cr@239 190
mas01cr@239 191 unsigned totalVectors=0;
mas01cr@239 192 char *thisKey = new char[MAXSTR];
mas01cr@239 193 char *thisFile = new char[MAXSTR];
mas01cr@239 194 char *thisTimesFileName = new char[MAXSTR];
mas01cr@239 195 char *thisPowerFileName = new char[MAXSTR];
mas01cr@239 196
mas01cr@239 197 do{
mas01cr@239 198 filesIn->getline(thisFile,MAXSTR);
mas01cr@239 199 if(key && key!=inFile)
mas01cr@239 200 keysIn->getline(thisKey,MAXSTR);
mas01cr@239 201 else
mas01cr@239 202 thisKey = thisFile;
mas01cr@239 203 if(usingTimes)
mas01cr@239 204 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@239 205 if(usingPower)
mas01cr@239 206 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@239 207
mas01cr@239 208 if(filesIn->eof())
mas01cr@239 209 break;
mas01cr@239 210
mas01cr@239 211 initInputFile(thisFile);
mas01cr@239 212
mas01cr@239 213 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@239 214 error("batchinsert failed: no more room in database", thisFile);
mas01cr@239 215 }
mas01cr@239 216
mas01cr@239 217 // Linear scan of filenames check for pre-existing feature
mas01cr@239 218 unsigned alreadyInserted=0;
mas01cr@239 219
mas01cr@239 220 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@239 221 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){
mas01cr@239 222 alreadyInserted=1;
mas01cr@239 223 break;
mas01cr@239 224 }
mas01cr@239 225
mas01cr@239 226 if(alreadyInserted) {
mas01cr@239 227 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
mas01cr@239 228 } else {
mas01cr@239 229 // Make a track index table of features to file indexes
mas01cr@239 230 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@239 231 if(!numVectors) {
mas01cr@239 232 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
mas01cr@239 233 }
mas01cr@239 234 else{
mas01cr@239 235 if(usingTimes){
mas01cr@239 236 if(timesFile->eof()) {
mas01cr@239 237 error("not enough timestamp files in timesList", timesFileName);
mas01cr@239 238 }
mas01cr@239 239 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01cr@239 240 if(!thisTimesFile->is_open()) {
mas01cr@239 241 error("Cannot open timestamp file", thisTimesFileName);
mas01cr@239 242 }
mas01cr@239 243 off_t insertoffset = dbH->length;
mas01cr@239 244 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
mas01cr@239 245 double *timesdata = timesTable + 2*indexoffset;
mas01cr@239 246 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@239 247 error("out of space for times", key);
mas01cr@239 248 }
mas01cr@239 249 insertTimeStamps(numVectors, thisTimesFile, timesdata);
mas01cr@239 250 if(thisTimesFile)
mas01cr@239 251 delete thisTimesFile;
mas01cr@239 252 }
mas01cr@239 253
mas01cr@239 254 if (usingPower) {
mas01cr@239 255 if(powerFile->eof()) {
mas01cr@239 256 error("not enough power files in powerList", powerFileName);
mas01cr@239 257 }
mas01cr@239 258 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@239 259 if (thispowerfd < 0) {
mas01cr@239 260 error("failed to open power file", thisPowerFileName);
mas01cr@239 261 }
mas01cr@239 262 off_t insertoffset = dbH->length;
mas01cr@239 263 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@239 264 double *powerdata = powerTable + poweroffset;
mas01cr@239 265 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@239 266 if (0 < thispowerfd) {
mas01cr@239 267 close(thispowerfd);
mas01cr@239 268 }
mas01cr@239 269 }
mas01cr@239 270 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
mas01cr@239 271
mas01cr@239 272 off_t insertoffset = dbH->length;// Store current state
mas01cr@239 273
mas01cr@239 274 // Increment file count
mas01cr@239 275 dbH->numFiles++;
mas01cr@239 276
mas01cr@239 277 // Update Header information
mas01cr@239 278 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@239 279
mas01cr@239 280 // Update track to file index map
mas01cr@239 281 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@239 282
mas01cr@239 283 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@239 284
mas01cr@239 285 // Norm the vectors on input if the database is already L2 normed
mas01cr@239 286 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@239 287 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@239 288
mas01cr@239 289 totalVectors+=numVectors;
mas01cr@239 290
mas01cr@239 291 // Copy the header back to the database
mas01cr@239 292 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@239 293 }
mas01cr@239 294 }
mas01cr@239 295 // CLEAN UP
mas01cr@239 296 munmap(indata,statbuf.st_size);
mas01cr@239 297 close(infid);
mas01cr@239 298 } while(!filesIn->eof());
mas01cr@239 299
mas01cr@239 300 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
mas01cr@239 301
mas01cr@239 302 // Report status
mas01cr@239 303 status(dbName);
mas01cr@239 304 }