view common.cpp @ 409:99e6cbad7f76 api-inversion

The lesser of two evils, part 2. Implement paths through audiodb_insert_datum_internal() for databases with O2_FLAG_LARGE_ADB, including in some of the helper functions. Most of the nasty stuff is concentrated in writing out the paths in what is now step 6, and everything else looks much as before, apart from a renumbering of the steps taken. Now we can implement audiodb_insert() for O2_FLAG_LARGE_ADB databases; we need to construct an adb_datum_internal_t from our adb_insert_t, but that's straightforward -- even just about straightforward enough to do it inline. Then audioDB::batchinsert() can be rewritten completely in terms of API functions, and doesn't need any kind of special treatment for the large case. Hooray. The real point of that is of course that we can now delete wodges of dead code, and move out audioDB::insert and audioDB::batchinsert into audioDB.cpp, because all they're doing now is dealing with command-line logic. This point marks the limit of what can be achieved in terms of "API inversion" at this time; the only remaining function, audiodb_query() / audioDB::query cannot be inverted because its API implementation is incomplete. Future plans, in some order: - merge this branch to trunk (check with current API/ABI clients); - complete audiodb_query() implementation; - invert audioDB::query / audiodb_query(); - MORE TESTS; - remove audioDB.cpp from list of files compiled into the library; - implement missing API functions (index, liszt, sample) directly; - source code rearrangement into library and command-line directories; - include bindings to library for some plausible candidate environments (Perl, Python, Lisp, Pd, Max/MSP) as examples; - API documentation.
author mas01cr
date Tue, 09 Dec 2008 22:48:30 +0000
parents 58b88ab69424
children 1e748d834e24
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
#include "audioDB-internals.h"
}

#if defined(O2_DEBUG)
void sigterm_action(int signal, siginfo_t *info, void *context) {
  exit(128+signal);
}

void sighup_action(int signal, siginfo_t *info, void *context) {
  // FIXME: reread any configuration files
}
#endif

int acquire_lock(int fd, bool exclusive) {
  struct flock lock;
  int status;
  
  lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
  lock.l_whence = SEEK_SET;
  lock.l_start = 0;
  lock.l_len = 0; /* "the whole file" */

 retry:
  do {
    status = fcntl(fd, F_SETLKW, &lock);
  } while (status != 0 && errno == EINTR);
  
  if (status) {
    if (errno == EAGAIN) {
      sleep(1);
      goto retry;
    } else {
      return status;
    }
  }
  return 0;
}

int divest_lock(int fd) {
  struct flock lock;

  lock.l_type = F_UNLCK;
  lock.l_whence = SEEK_SET;
  lock.l_start = 0;
  lock.l_len = 0;

  return fcntl(fd, F_SETLKW, &lock);
}

void audioDB::get_lock(int fd, bool exclusive) {
  if(acquire_lock(fd, exclusive)) {
    error("fcntl lock error", "", "fcntl");
  }
}

void audioDB::release_lock(int fd) {
  if (divest_lock(fd)) {
    error("fcntl unlock error", "", "fcntl");
  }
}

void audioDB::error(const char* a, const char* b, const char *sysFunc) {
 

    if(isServer) {
        /* FIXME: I think this is leaky -- we never delete err.
           actually deleting it is tricky, though; it gets placed into
           some soap-internal struct with uncertain extent... -- CSR,
           2007-10-01 */
        char *err = new char[256]; /* FIXME: overflows */
        snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : "");
        /* FIXME: actually we could usefully do with a properly
           structured type, so that we can throw separate faultstring
           and details.  -- CSR, 2007-10-01 */
        throw(err);
    } else if (UseApiError){
        apierrortemp=-1;
        throw(apierrortemp);
    } else {
        std::cerr << a << ": " << b << std::endl;
        if (sysFunc) {
            perror(sysFunc);
        }
        exit(1);
    }

}

void audioDB::initRNG() {
  rng = gsl_rng_alloc(gsl_rng_mt19937);
  if(!rng) {
    error("could not allocate Random Number Generator");
  }
  /* FIXME: maybe we should use a real source of entropy? */
  gsl_rng_set(rng, time(NULL));
}

void audioDB::initDBHeader(const char* dbName) {
  if(!adb) {
    adb = audiodb_open(dbName, forWrite ? O_RDWR : O_RDONLY);
    if(!adb) {
      error("Failed to open database", dbName);
    }
  }
  dbfid = adb->fd;
  dbH = adb->header;

  CHECKED_MMAP(char *, db, 0, getpagesize());

  // Make some handy tables with correct types
  if(forWrite || (dbH->length > 0)) {
    if(forWrite) {
      fileTableLength = dbH->trackTableOffset - dbH->fileTableOffset;
      trackTableLength = dbH->dataOffset - dbH->trackTableOffset;
      dataBufLength = dbH->timesTableOffset - dbH->dataOffset;
      timesTableLength = dbH->powerTableOffset - dbH->timesTableOffset;
      powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset;
      l2normTableLength = dbH->dbSize - dbH->l2normTableOffset;
    } else {
      fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
      trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLE_ENTRY_SIZE);
      if( dbH->flags & O2_FLAG_LARGE_ADB ){
	dataBufLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
	timesTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
	powerTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
	l2normTableLength = 0;
      }
      else{
	dataBufLength = ALIGN_PAGE_UP(dbH->length);
	timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim));
	powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
	l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
      }
    }
    CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength);
    CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength);
    /*
     * No more mmap() for dataBuf
     *
     * FIXME: Actually we do do the mmap() in the two cases where it's
     * still "needed": in pointQuery and in l2norm if dbH->length is
     * non-zero.  Removing those cases too (and deleting the dataBuf
     * variable completely) would be cool.  -- CSR, 2007-11-19
     *
     * CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
     */
    if( dbH->flags & O2_FLAG_LARGE_ADB ){
      CHECKED_MMAP(char *, featureFileNameTable, dbH->dataOffset, fileTableLength);
      if( dbH->flags & O2_FLAG_TIMES )
	CHECKED_MMAP(char *, timesFileNameTable, dbH->timesTableOffset, fileTableLength);
      if( dbH->flags & O2_FLAG_POWER )
	CHECKED_MMAP(char *, powerFileNameTable, dbH->powerTableOffset, fileTableLength);
    }
    else{
      CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, timesTableLength);
      CHECKED_MMAP(double *, powerTable, dbH->powerTableOffset, powerTableLength);
      CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength);
    }
  }

  // build track offset table
  trackOffsetTable = new off_t[dbH->numFiles];
  Uns32T cumTrack=0;
  for(Uns32T k = 0; k < dbH->numFiles; k++){
    trackOffsetTable[k] = cumTrack;
    cumTrack += trackTable[k] * dbH->dim;
  }

  // Assign correct number of point bits per track in LSH indexing / retrieval
  lsh_n_point_bits = dbH->flags >> 28;
  if( !lsh_n_point_bits )
    lsh_n_point_bits = O2_DEFAULT_LSH_N_POINT_BITS;
}

void audioDB::initInputFile (const char *inFile, bool loadData) {
  if (inFile) {
    if ((infid = open(inFile, O_RDONLY)) < 0) {
      error("can't open input file for reading", inFile, "open");
    }

    if (fstat(infid, &statbuf) < 0) {
      error("fstat error finding size of input", inFile, "fstat");
    }

    if(dbH->dim == 0 && dbH->length == 0) { // empty database
      // initialize with input dimensionality
      if(read(infid, &dbH->dim, sizeof(unsigned)) != sizeof(unsigned)) {
        error("short read of input file", inFile);
      }
      if(dbH->dim == 0) {
        error("dimensionality of zero in input file", inFile);
      }
    } else {
      unsigned test;
      if(read(infid, &test, sizeof(unsigned)) != sizeof(unsigned)) {
        error("short read of input file", inFile);
      }
      if(dbH->dim == 0) {
        error("dimensionality of zero in input file", inFile);
      }
      if(dbH->dim != test) {      
	std::cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<std::endl;
	error("feature dimensions do not match database table dimensions", inFile);
      }
    }
    
    if (loadData && ((indata = (char *) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1)) {
      error("mmap error for input", inFile, "mmap");
    }
  }
}

void audioDB::initTables(const char* dbName, const char* inFile) {
  /* FIXME: initRNG() really logically belongs in the audioDB
     contructor.  However, there are of the order of four constructors
     at the moment, and more to come from API implementation.  Given
     that duplication, I think this is the least worst place to put
     it; the assumption is that nothing which doesn't look at a
     database will need an RNG.  -- CSR, 2008-07-02 */
  initRNG();
  initDBHeader(dbName);
  if(inFile)
    initInputFile(inFile);
}

// If name is relative path, side effect name with prefix/name
// Do not free original pointer
void audioDB::prefix_name(char** const name, const char* prefix){
  // No prefix if prefix is empty
  if(!prefix)
    return;
  // Allocate new memory, keep old memory
  assert(name && *name);
  if (strlen(*name) + strlen(prefix) + 1 > O2_MAXFILESTR)
    error("error: path prefix + filename too long",prefix);
  // Do not prefix absolute path+filename
  if(**name=='/')
    return;
  // OK to prefix relative path+filename
  char* prefixedName = (char*) malloc(O2_MAXFILESTR);
  sprintf(prefixedName, "%s/%s", prefix, *name);
  *name = prefixedName; // side effect new name to old name
}