annotate insert.cpp @ 213:78a144448bda refactoring

Use "override CFLAGS+=" in Makefile. Allows e.g. "make CFLAGS=-pg" and still getting "-g -O3"
author mas01cr
date Tue, 04 Dec 2007 09:16:48 +0000
parents 2ea1908707c7
children 0eab3ca2267d
rev   line source
mas01cr@204 1 #include "audioDB.h"
mas01cr@204 2
mas01cr@204 3 bool audioDB::enough_data_space_free(off_t size) {
mas01cr@204 4 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@204 5 }
mas01cr@204 6
mas01cr@204 7 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@204 8 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
mas01cr@204 9 write(dbfid, buffer, size);
mas01cr@204 10 }
mas01cr@204 11
mas01cr@204 12 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@204 13 forWrite = true;
mas01cr@204 14 initTables(dbName, inFile);
mas01cr@204 15
mas01cr@204 16 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 17 error("Must use timestamps with timestamped database","use --times");
mas01cr@204 18
mas01cr@204 19 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@204 20 error("Must use power with power-enabled database", dbName);
mas01cr@204 21
mas01cr@204 22 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@204 23 error("Insert failed: no more room in database", inFile);
mas01cr@204 24 }
mas01cr@204 25
mas01cr@204 26 if(!key)
mas01cr@204 27 key=inFile;
mas01cr@204 28 // Linear scan of filenames check for pre-existing feature
mas01cr@204 29 unsigned alreadyInserted=0;
mas01cr@204 30 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@204 31 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){
mas01cr@204 32 alreadyInserted=1;
mas01cr@204 33 break;
mas01cr@204 34 }
mas01cr@204 35
mas01cr@204 36 if(alreadyInserted){
mas01cr@204 37 if(verbosity) {
mas01cr@204 38 std::cerr << "Warning: key already exists in database, ignoring: " <<inFile << std::endl;
mas01cr@204 39 }
mas01cr@204 40 return;
mas01cr@204 41 }
mas01cr@204 42
mas01cr@204 43 // Make a track index table of features to file indexes
mas01cr@204 44 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 45 if(!numVectors){
mas01cr@204 46 if(verbosity) {
mas01cr@204 47 std::cerr << "Warning: ignoring zero-length feature vector file:" << key << std::endl;
mas01cr@204 48 }
mas01cr@204 49 // CLEAN UP
mas01cr@204 50 munmap(indata,statbuf.st_size);
mas01cr@204 51 munmap(db,dbH->dbSize);
mas01cr@204 52 close(infid);
mas01cr@204 53 return;
mas01cr@204 54 }
mas01cr@204 55
mas01cr@204 56 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
mas01cr@204 57
mas01cr@204 58 off_t insertoffset = dbH->length;// Store current state
mas01cr@204 59
mas01cr@204 60 // Check times status and insert times from file
mas01cr@204 61 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
mas01cr@204 62 double *timesdata = timesTable + 2*indexoffset;
mas01cr@204 63
mas01cr@204 64 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@204 65 error("out of space for times", key);
mas01cr@204 66 }
mas01cr@204 67
mas01cr@204 68 if (usingTimes) {
mas01cr@204 69 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@204 70 }
mas01cr@204 71
mas01cr@204 72 double *powerdata = powerTable + indexoffset;
mas01cr@204 73 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@204 74
mas01cr@204 75 // Increment file count
mas01cr@204 76 dbH->numFiles++;
mas01cr@204 77
mas01cr@204 78 // Update Header information
mas01cr@204 79 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@204 80
mas01cr@204 81 // Update track to file index map
mas01cr@204 82 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
mas01cr@204 83
mas01cr@204 84 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@204 85
mas01cr@204 86 // Norm the vectors on input if the database is already L2 normed
mas01cr@204 87 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@204 88 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@204 89
mas01cr@204 90 // Report status
mas01cr@204 91 status(dbName);
mas01cr@204 92 if(verbosity) {
mas01cr@204 93 std::cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
mas01cr@204 94 << (statbuf.st_size-sizeof(int)) << " bytes." << std::endl;
mas01cr@204 95 }
mas01cr@204 96
mas01cr@204 97 // Copy the header back to the database
mas01cr@204 98 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@204 99
mas01cr@204 100 // CLEAN UP
mas01cr@204 101 munmap(indata,statbuf.st_size);
mas01cr@204 102 close(infid);
mas01cr@204 103 }
mas01cr@204 104
mas01cr@204 105 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@204 106 assert(usingTimes);
mas01cr@204 107
mas01cr@204 108 unsigned numtimes = 0;
mas01cr@204 109
mas01cr@204 110 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
mas01cr@204 111 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@204 112 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@204 113 error("Timestamp file used with non-timestamped database", timesFileName);
mas01cr@204 114 }
mas01cr@204 115
mas01cr@204 116 if(!timesFile->is_open()) {
mas01cr@204 117 error("problem opening times file on timestamped database", timesFileName);
mas01cr@204 118 }
mas01cr@204 119
mas01cr@204 120 double timepoint, next;
mas01cr@204 121 *timesFile >> timepoint;
mas01cr@204 122 if (timesFile->eof()) {
mas01cr@204 123 error("no entries in times file", timesFileName);
mas01cr@204 124 }
mas01cr@204 125 numtimes++;
mas01cr@204 126 do {
mas01cr@204 127 *timesFile >> next;
mas01cr@204 128 if (timesFile->eof()) {
mas01cr@204 129 break;
mas01cr@204 130 }
mas01cr@204 131 numtimes++;
mas01cr@204 132 timesdata[0] = timepoint;
mas01cr@204 133 timepoint = (timesdata[1] = next);
mas01cr@204 134 timesdata += 2;
mas01cr@204 135 } while (numtimes < numVectors + 1);
mas01cr@204 136
mas01cr@204 137 if (numtimes < numVectors + 1) {
mas01cr@204 138 error("too few timepoints in times file", timesFileName);
mas01cr@204 139 }
mas01cr@204 140
mas01cr@204 141 *timesFile >> next;
mas01cr@204 142 if (!timesFile->eof()) {
mas01cr@204 143 error("too many timepoints in times file", timesFileName);
mas01cr@204 144 }
mas01cr@204 145 }
mas01cr@204 146
mas01cr@204 147 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01cr@204 148 if (usingPower) {
mas01cr@204 149 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@204 150 error("Cannot insert power data on non-power DB", dbName);
mas01cr@204 151 }
mas01cr@204 152
mas01cr@204 153 int one;
mas01cr@204 154 unsigned int count;
mas01cr@204 155
mas01cr@204 156 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@204 157 if (count != sizeof(unsigned int)) {
mas01cr@204 158 error("powerfd read failed", "int", "read");
mas01cr@204 159 }
mas01cr@204 160 if (one != 1) {
mas01cr@204 161 error("dimensionality of power file not 1", powerFileName);
mas01cr@204 162 }
mas01cr@204 163
mas01cr@204 164 // FIXME: should check that the powerfile is the right size for
mas01cr@204 165 // this. -- CSR, 2007-10-30
mas01cr@204 166 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@204 167 if (count != numVectors * sizeof(double)) {
mas01cr@204 168 error("powerfd read failed", "double", "read");
mas01cr@204 169 }
mas01cr@204 170 }
mas01cr@204 171 }
mas01cr@204 172
mas01cr@204 173 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@204 174
mas01cr@204 175 forWrite = true;
mas01cr@204 176 initDBHeader(dbName);
mas01cr@204 177
mas01cr@204 178 if(!key)
mas01cr@204 179 key=inFile;
mas01cr@204 180 std::ifstream *filesIn = 0;
mas01cr@204 181 std::ifstream *keysIn = 0;
mas01cr@204 182 std::ifstream* thisTimesFile = 0;
mas01cr@204 183 int thispowerfd = 0;
mas01cr@204 184
mas01cr@204 185 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@204 186 error("Could not open batch in file", inFile);
mas01cr@204 187 if(key && key!=inFile)
mas01cr@204 188 if(!(keysIn = new std::ifstream(key)))
mas01cr@204 189 error("Could not open batch key file",key);
mas01cr@204 190
mas01cr@204 191 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 192 error("Must use timestamps with timestamped database","use --times");
mas01cr@204 193
mas01cr@204 194 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@204 195 error("Must use power with power-enabled database", dbName);
mas01cr@204 196
mas01cr@204 197 unsigned totalVectors=0;
mas01cr@204 198 char *thisKey = new char[MAXSTR];
mas01cr@204 199 char *thisFile = new char[MAXSTR];
mas01cr@204 200 char *thisTimesFileName = new char[MAXSTR];
mas01cr@204 201 char *thisPowerFileName = new char[MAXSTR];
mas01cr@204 202
mas01cr@204 203 do{
mas01cr@204 204 filesIn->getline(thisFile,MAXSTR);
mas01cr@204 205 if(key && key!=inFile)
mas01cr@204 206 keysIn->getline(thisKey,MAXSTR);
mas01cr@204 207 else
mas01cr@204 208 thisKey = thisFile;
mas01cr@204 209 if(usingTimes)
mas01cr@204 210 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@204 211 if(usingPower)
mas01cr@204 212 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@204 213
mas01cr@204 214 if(filesIn->eof())
mas01cr@204 215 break;
mas01cr@204 216
mas01cr@204 217 initInputFile(thisFile);
mas01cr@204 218
mas01cr@204 219 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@204 220 error("batchinsert failed: no more room in database", thisFile);
mas01cr@204 221 }
mas01cr@204 222
mas01cr@204 223 // Linear scan of filenames check for pre-existing feature
mas01cr@204 224 unsigned alreadyInserted=0;
mas01cr@204 225
mas01cr@204 226 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@204 227 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){
mas01cr@204 228 alreadyInserted=1;
mas01cr@204 229 break;
mas01cr@204 230 }
mas01cr@204 231
mas01cr@204 232 if(alreadyInserted){
mas01cr@204 233 if(verbosity) {
mas01cr@204 234 std::cerr << "Warning: key already exists in database:" << thisKey << std::endl;
mas01cr@204 235 }
mas01cr@204 236 }
mas01cr@204 237 else{
mas01cr@204 238
mas01cr@204 239 // Make a track index table of features to file indexes
mas01cr@204 240 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 241 if(!numVectors){
mas01cr@204 242 if(verbosity) {
mas01cr@204 243 std::cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << std::endl;
mas01cr@204 244 }
mas01cr@204 245 }
mas01cr@204 246 else{
mas01cr@204 247 if(usingTimes){
mas01cr@204 248 if(timesFile->eof()) {
mas01cr@204 249 error("not enough timestamp files in timesList", timesFileName);
mas01cr@204 250 }
mas01cr@204 251 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
mas01cr@204 252 if(!thisTimesFile->is_open()) {
mas01cr@204 253 error("Cannot open timestamp file", thisTimesFileName);
mas01cr@204 254 }
mas01cr@204 255 off_t insertoffset = dbH->length;
mas01cr@204 256 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
mas01cr@204 257 double *timesdata = timesTable + 2*indexoffset;
mas01cr@204 258 if(2*(indexoffset + numVectors) > timesTableLength) {
mas01cr@204 259 error("out of space for times", key);
mas01cr@204 260 }
mas01cr@204 261 insertTimeStamps(numVectors, thisTimesFile, timesdata);
mas01cr@204 262 if(thisTimesFile)
mas01cr@204 263 delete thisTimesFile;
mas01cr@204 264 }
mas01cr@204 265
mas01cr@204 266 if (usingPower) {
mas01cr@204 267 if(powerFile->eof()) {
mas01cr@204 268 error("not enough power files in powerList", powerFileName);
mas01cr@204 269 }
mas01cr@204 270 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@204 271 if (thispowerfd < 0) {
mas01cr@204 272 error("failed to open power file", thisPowerFileName);
mas01cr@204 273 }
mas01cr@204 274 unsigned insertoffset = dbH->length;
mas01cr@204 275 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@204 276 double *powerdata = powerTable + poweroffset;
mas01cr@204 277 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@204 278 if (0 < thispowerfd) {
mas01cr@204 279 close(thispowerfd);
mas01cr@204 280 }
mas01cr@204 281 }
mas01cr@204 282 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
mas01cr@204 283
mas01cr@204 284 off_t insertoffset = dbH->length;// Store current state
mas01cr@204 285
mas01cr@204 286 // Increment file count
mas01cr@204 287 dbH->numFiles++;
mas01cr@204 288
mas01cr@204 289 // Update Header information
mas01cr@204 290 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@204 291
mas01cr@204 292 // Update track to file index map
mas01cr@204 293 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@204 294
mas01cr@204 295 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@204 296
mas01cr@204 297 // Norm the vectors on input if the database is already L2 normed
mas01cr@204 298 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@204 299 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
mas01cr@204 300
mas01cr@204 301 totalVectors+=numVectors;
mas01cr@204 302
mas01cr@204 303 // Copy the header back to the database
mas01cr@204 304 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@204 305 }
mas01cr@204 306 }
mas01cr@204 307 // CLEAN UP
mas01cr@204 308 munmap(indata,statbuf.st_size);
mas01cr@204 309 close(infid);
mas01cr@204 310 }while(!filesIn->eof());
mas01cr@204 311
mas01cr@204 312 if(verbosity) {
mas01cr@204 313 std::cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
mas01cr@204 314 << totalVectors*dbH->dim*sizeof(double) << " bytes." << std::endl;
mas01cr@204 315 }
mas01cr@204 316
mas01cr@204 317 // Report status
mas01cr@204 318 status(dbName);
mas01cr@204 319 }