annotate common.cpp @ 323:64c844de82d0 large_adb

Fixed an indexing bug where rest of track was ignored after first shingle with power below threshold. Put default O2_LSH_POINT_BITS back to 14 (16384 points), can be altered at compile time with CFLAGS+=-DO2_LSH_POINT_BITS n
author mas01mc
date Thu, 21 Aug 2008 21:02:14 +0000
parents da2272e029b3
children
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2
mas01cr@239 3 #if defined(O2_DEBUG)
mas01cr@239 4 void sigterm_action(int signal, siginfo_t *info, void *context) {
mas01cr@239 5 exit(128+signal);
mas01cr@239 6 }
mas01cr@239 7
mas01cr@239 8 void sighup_action(int signal, siginfo_t *info, void *context) {
mas01cr@239 9 // FIXME: reread any configuration files
mas01cr@239 10 }
mas01cr@239 11 #endif
mas01cr@239 12
mas01cr@239 13 void audioDB::get_lock(int fd, bool exclusive) {
mas01cr@239 14 struct flock lock;
mas01cr@239 15 int status;
mas01cr@239 16
mas01cr@239 17 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
mas01cr@239 18 lock.l_whence = SEEK_SET;
mas01cr@239 19 lock.l_start = 0;
mas01cr@239 20 lock.l_len = 0; /* "the whole file" */
mas01cr@239 21
mas01cr@239 22 retry:
mas01cr@239 23 do {
mas01cr@239 24 status = fcntl(fd, F_SETLKW, &lock);
mas01cr@239 25 } while (status != 0 && errno == EINTR);
mas01cr@239 26
mas01cr@239 27 if (status) {
mas01cr@239 28 if (errno == EAGAIN) {
mas01cr@239 29 sleep(1);
mas01cr@239 30 goto retry;
mas01cr@239 31 } else {
mas01cr@239 32 error("fcntl lock error", "", "fcntl");
mas01cr@239 33 }
mas01cr@239 34 }
mas01cr@239 35 }
mas01cr@239 36
mas01cr@239 37 void audioDB::release_lock(int fd) {
mas01cr@239 38 struct flock lock;
mas01cr@239 39 int status;
mas01cr@239 40
mas01cr@239 41 lock.l_type = F_UNLCK;
mas01cr@239 42 lock.l_whence = SEEK_SET;
mas01cr@239 43 lock.l_start = 0;
mas01cr@239 44 lock.l_len = 0;
mas01cr@239 45
mas01cr@239 46 status = fcntl(fd, F_SETLKW, &lock);
mas01cr@239 47
mas01cr@239 48 if (status)
mas01cr@239 49 error("fcntl unlock error", "", "fcntl");
mas01cr@239 50 }
mas01cr@239 51
mas01cr@239 52 void audioDB::error(const char* a, const char* b, const char *sysFunc) {
mas01cr@239 53 if(isServer) {
mas01cr@239 54 /* FIXME: I think this is leaky -- we never delete err. actually
mas01cr@239 55 deleting it is tricky, though; it gets placed into some
mas01cr@239 56 soap-internal struct with uncertain extent... -- CSR,
mas01cr@239 57 2007-10-01 */
mas01cr@239 58 char *err = new char[256]; /* FIXME: overflows */
mas01cr@239 59 snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : "");
mas01cr@239 60 /* FIXME: actually we could usefully do with a properly structured
mas01cr@239 61 type, so that we can throw separate faultstring and details.
mas01cr@239 62 -- CSR, 2007-10-01 */
mas01cr@239 63 throw(err);
mas01cr@239 64 } else {
mas01cr@239 65 std::cerr << a << ": " << b << std::endl;
mas01cr@239 66 if (sysFunc) {
mas01cr@239 67 perror(sysFunc);
mas01cr@239 68 }
mas01cr@239 69 exit(1);
mas01cr@239 70 }
mas01cr@239 71 }
mas01cr@239 72
mas01cr@284 73 void audioDB::initRNG() {
mas01cr@284 74 rng = gsl_rng_alloc(gsl_rng_mt19937);
mas01cr@284 75 if(!rng) {
mas01cr@284 76 error("could not allocate Random Number Generator");
mas01cr@284 77 }
mas01cr@284 78 /* FIXME: maybe we should use a real source of entropy? */
mas01cr@284 79 gsl_rng_set(rng, time(NULL));
mas01cr@284 80 }
mas01cr@284 81
mas01cr@239 82 void audioDB::initDBHeader(const char* dbName) {
mas01cr@239 83 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
mas01cr@239 84 error("Can't open database file", dbName, "open");
mas01cr@239 85 }
mas01cr@239 86
mas01cr@239 87 get_lock(dbfid, forWrite);
mas01cr@239 88 // Get the database header info
mas01cr@239 89 dbH = new dbTableHeaderT();
mas01cr@239 90 assert(dbH);
mas01cr@239 91
mas01cr@239 92 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
mas01cr@239 93 error("error reading db header", dbName, "read");
mas01cr@239 94 }
mas01cr@239 95
mas01cr@239 96 if(dbH->magic == O2_OLD_MAGIC) {
mas01cr@239 97 // FIXME: if anyone ever complains, write the program to convert
mas01cr@239 98 // from the old audioDB format to the new...
mas01cr@239 99 error("database file has old O2 header", dbName);
mas01cr@239 100 }
mas01cr@239 101
mas01cr@239 102 if(dbH->magic != O2_MAGIC) {
mas01cr@239 103 std::cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << std::endl;
mas01cr@239 104 error("database file has incorrect header", dbName);
mas01cr@239 105 }
mas01cr@239 106
mas01cr@239 107 if(dbH->version != O2_FORMAT_VERSION) {
mas01cr@239 108 error("database file has incorrect version", dbName);
mas01cr@239 109 }
mas01cr@239 110
mas01cr@239 111 if(dbH->headerSize != O2_HEADERSIZE) {
mas01cr@239 112 error("sizeof(dbTableHeader) unexpected: platform ABI mismatch?", dbName);
mas01cr@239 113 }
mas01cr@239 114
mas01cr@239 115 CHECKED_MMAP(char *, db, 0, getpagesize());
mas01cr@239 116
mas01cr@239 117 // Make some handy tables with correct types
mas01cr@239 118 if(forWrite || (dbH->length > 0)) {
mas01cr@239 119 if(forWrite) {
mas01cr@239 120 fileTableLength = dbH->trackTableOffset - dbH->fileTableOffset;
mas01cr@239 121 trackTableLength = dbH->dataOffset - dbH->trackTableOffset;
mas01cr@239 122 dataBufLength = dbH->timesTableOffset - dbH->dataOffset;
mas01cr@239 123 timesTableLength = dbH->powerTableOffset - dbH->timesTableOffset;
mas01cr@239 124 powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset;
mas01cr@239 125 l2normTableLength = dbH->dbSize - dbH->l2normTableOffset;
mas01cr@239 126 } else {
mas01cr@256 127 fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
mas01cr@256 128 trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLE_ENTRY_SIZE);
mas01mc@318 129 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@318 130 dataBufLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
mas01mc@318 131 timesTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
mas01mc@318 132 powerTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
mas01mc@318 133 l2normTableLength = 0;
mas01mc@318 134 }
mas01mc@318 135 else{
mas01mc@318 136 dataBufLength = ALIGN_PAGE_UP(dbH->length);
mas01mc@318 137 timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim));
mas01mc@318 138 powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
mas01mc@318 139 l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
mas01mc@318 140 }
mas01cr@239 141 }
mas01cr@239 142 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength);
mas01cr@239 143 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength);
mas01cr@239 144 /*
mas01cr@239 145 * No more mmap() for dataBuf
mas01cr@239 146 *
mas01cr@239 147 * FIXME: Actually we do do the mmap() in the two cases where it's
mas01cr@239 148 * still "needed": in pointQuery and in l2norm if dbH->length is
mas01cr@239 149 * non-zero. Removing those cases too (and deleting the dataBuf
mas01cr@239 150 * variable completely) would be cool. -- CSR, 2007-11-19
mas01cr@239 151 *
mas01cr@239 152 * CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
mas01cr@239 153 */
mas01mc@318 154 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@318 155 CHECKED_MMAP(char *, featureFileNameTable, dbH->dataOffset, fileTableLength);
mas01mc@318 156 if( dbH->flags & O2_FLAG_TIMES )
mas01mc@318 157 CHECKED_MMAP(char *, timesFileNameTable, dbH->timesTableOffset, fileTableLength);
mas01mc@318 158 if( dbH->flags & O2_FLAG_POWER )
mas01mc@318 159 CHECKED_MMAP(char *, powerFileNameTable, dbH->powerTableOffset, fileTableLength);
mas01mc@318 160 }
mas01mc@318 161 else{
mas01mc@318 162 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, timesTableLength);
mas01mc@318 163 CHECKED_MMAP(double *, powerTable, dbH->powerTableOffset, powerTableLength);
mas01mc@318 164 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength);
mas01mc@318 165 }
mas01cr@239 166 }
mas01mc@292 167
mas01mc@292 168 // build track offset table
mas01mc@292 169 trackOffsetTable = new off_t[dbH->numFiles];
mas01mc@292 170 Uns32T cumTrack=0;
mas01mc@292 171 for(Uns32T k = 0; k < dbH->numFiles; k++){
mas01mc@292 172 trackOffsetTable[k] = cumTrack;
mas01mc@292 173 cumTrack += trackTable[k] * dbH->dim;
mas01mc@319 174 }
mas01mc@319 175
mas01mc@319 176 // Assign correct number of point bits per track in LSH indexing / retrieval
mas01mc@319 177 lsh_n_point_bits = dbH->flags >> 28;
mas01mc@319 178 if( !lsh_n_point_bits )
mas01mc@319 179 lsh_n_point_bits = O2_DEFAULT_LSH_N_POINT_BITS;
mas01cr@239 180 }
mas01cr@239 181
mas01mc@316 182 void audioDB::initInputFile (const char *inFile, bool loadData) {
mas01cr@239 183 if (inFile) {
mas01cr@239 184 if ((infid = open(inFile, O_RDONLY)) < 0) {
mas01cr@239 185 error("can't open input file for reading", inFile, "open");
mas01cr@239 186 }
mas01cr@239 187
mas01cr@239 188 if (fstat(infid, &statbuf) < 0) {
mas01cr@239 189 error("fstat error finding size of input", inFile, "fstat");
mas01cr@239 190 }
mas01cr@239 191
mas01cr@239 192 if(dbH->dim == 0 && dbH->length == 0) { // empty database
mas01cr@239 193 // initialize with input dimensionality
mas01cr@239 194 if(read(infid, &dbH->dim, sizeof(unsigned)) != sizeof(unsigned)) {
mas01cr@239 195 error("short read of input file", inFile);
mas01cr@239 196 }
mas01cr@239 197 if(dbH->dim == 0) {
mas01cr@239 198 error("dimensionality of zero in input file", inFile);
mas01cr@239 199 }
mas01cr@239 200 } else {
mas01cr@239 201 unsigned test;
mas01cr@239 202 if(read(infid, &test, sizeof(unsigned)) != sizeof(unsigned)) {
mas01cr@239 203 error("short read of input file", inFile);
mas01cr@239 204 }
mas01cr@239 205 if(dbH->dim == 0) {
mas01cr@239 206 error("dimensionality of zero in input file", inFile);
mas01cr@239 207 }
mas01cr@239 208 if(dbH->dim != test) {
mas01cr@239 209 std::cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<std::endl;
mas01cr@239 210 error("feature dimensions do not match database table dimensions", inFile);
mas01cr@239 211 }
mas01cr@239 212 }
mas01cr@239 213
mas01mc@316 214 if (loadData && ((indata = (char *) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1)) {
mas01cr@239 215 error("mmap error for input", inFile, "mmap");
mas01cr@239 216 }
mas01cr@239 217 }
mas01cr@239 218 }
mas01cr@239 219
mas01mc@292 220 void audioDB::initTables(const char* dbName, const char* inFile) {
mas01cr@284 221 /* FIXME: initRNG() really logically belongs in the audioDB
mas01cr@284 222 contructor. However, there are of the order of four constructors
mas01cr@284 223 at the moment, and more to come from API implementation. Given
mas01cr@284 224 that duplication, I think this is the least worst place to put
mas01cr@284 225 it; the assumption is that nothing which doesn't look at a
mas01cr@284 226 database will need an RNG. -- CSR, 2008-07-02 */
mas01cr@284 227 initRNG();
mas01cr@239 228 initDBHeader(dbName);
mas01mc@292 229 if(inFile)
mas01mc@292 230 initInputFile(inFile);
mas01cr@239 231 }
mas01mc@292 232
mas01mc@321 233 // If name is relative path, side effect name with prefix/name
mas01mc@321 234 // Do not free original pointer
mas01mc@321 235 void audioDB::prefix_name(char** const name, const char* prefix){
mas01mc@321 236 // No prefix if prefix is empty
mas01mc@321 237 if(!prefix)
mas01mc@321 238 return;
mas01mc@321 239 // Allocate new memory, keep old memory
mas01mc@321 240 assert(name && *name);
mas01mc@321 241 if (strlen(*name) + strlen(prefix) + 1 > O2_MAXFILESTR)
mas01mc@321 242 error("error: path prefix + filename too long",prefix);
mas01mc@321 243 // Do not prefix absolute path+filename
mas01mc@321 244 if(**name=='/')
mas01mc@321 245 return;
mas01mc@321 246 // OK to prefix relative path+filename
mas01mc@321 247 char* prefixedName = (char*) malloc(O2_MAXFILESTR);
mas01mc@321 248 sprintf(prefixedName, "%s/%s", prefix, *name);
mas01mc@321 249 *name = prefixedName; // side effect new name to old name
mas01mc@321 250 }