view create.cpp @ 408:f0a69693eaef api-inversion

The lesser of two evils, part 1. Most of the body of audiodb_insert_datum() will apply to "LARGE_ADB"-type insertions: checking for the right flags, checking for enough space free, synchronizing the header. Wouldn't it be nice if we could reuse all that code (or at least the bits that apply) without one horrible almost-identical cut-and-paste job (see batchinsert_large_adb(), or if that's not compelling enough, the four almost-identical query loops from before the Great Refactoring). Well, yes, it would. Sadly C makes it mildly difficult, because its functions are explicitly typed (so we can't pass arbitrary arguments of other types, even if they're ABI-compatible), while its macros are textual (which makes writing and maintaining them horrible). The thought of a union argument was briefly entertained and then discarded as being just Too Weird. So, instead, (ab)use the oldest trick in the book: void *. Define an adb_datum_internal_t which has void * instead of double *; the intention is that this internal data type can be constructed both from an adb_datum_t and some notional adb_reference_t (which looks very much like an adb_insert_t at the time of writing, with char * structure entries representing filenames). This adb_datum_internal_t structure is very much an internals-only thing, so put its definition in the internals header. Call what was previously audiodb_insert_datum() a new function audiodb_insert_datum_internal(), made static so that really no-one is tempted to call it other than ourselves. audiodb_insert_datum() is then trivial in terms of this new function, if stupidly tedious. (If we were playing dangerously, we could just perform a cast, but relying on the fact that sizeof(double *) = sizeof(void *) would almost certainly end up biting when we least expect. Incidental inclusion in this patch, since I noticed it at the time: actually check for the O2_FLAG_L2NORM before scribbling all over the l2norm table. Somewhat unsurprisingly, there are as yet no tests to defend against this (harmless, as it turns out) erroneous behaviour.
author mas01cr
date Tue, 09 Dec 2008 20:53:39 +0000
parents a82a2d9b2451
children d7e590d58c85
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
}
/* Make a new database.

IF size(featuredata) < O2_LARGE_ADB_SIZE
   The database consists of:

   * a header (see dbTableHeader struct definition);
   * keyTable: list of keys of tracks;
   * trackTable: Maps implicit feature index to a feature vector
     matrix (sizes of tracks)
   * featureTable: Lots of doubles;
   * timesTable: (start,end) time points for each feature vector;
   * powerTable: associated power for each feature vector;
   * l2normTable: squared l2norms for each feature vector.

ELSE the database consists of:

   * a header (see dbTableHeader struct definition);
   * keyTable: list of keys of tracks
   * trackTable: sizes of tracks
   * featureTable: list of feature file names
   * timesTable: list of times file names
   * powerTable: list of power file names

*/

adb_t *audiodb_create(const char *path, unsigned datasize, unsigned ntracks, unsigned datadim) {
  int fd;
  adb_header_t *header = 0;
  off_t databytes, auxbytes;
  if(datasize == 0) {
    datasize = O2_DEFAULT_DATASIZE;
  }
  if(ntracks == 0) {
    ntracks = O2_DEFAULT_NTRACKS;
  }
  if(datadim == 0) {
    datadim = O2_DEFAULT_DATADIM;
  }

  if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
    goto error;
  }
  if (acquire_lock(fd, true)) {
    goto error;
  }

  header = (adb_header_t *) malloc(sizeof(adb_header_t));
  if(!header) {
    goto error;
  }

  // Initialize header
  header->magic = O2_MAGIC;
  header->version = O2_FORMAT_VERSION;
  header->numFiles = 0;
  header->dim = 0;
  header->flags = 0;
  header->headerSize = O2_HEADERSIZE;
  header->length = 0;
  header->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
  header->trackTableOffset = ALIGN_PAGE_UP(header->fileTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
  header->dataOffset = ALIGN_PAGE_UP(header->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks);

  databytes = ((off_t) datasize) * 1024 * 1024;
  auxbytes = databytes / datadim;

  /* FIXME: what's going on here?  There are two distinct
     preprocessor constants (O2_LSH_N_POINT_BITS, LSH_N_POINT_BITS);
     a third is presumably some default
     (O2_DEFAULT_LSH_N_POINT_BITS), and then there's this magic 28
     bits.  Should this really be part of the flags structure at
     all?  Putting it elsewhere will of course break backwards
     compatibility, unless 14 is the only value that's been used
     anywhere... */

  // For backward-compatibility, Record the point-encoding parameter for LSH indexing in the adb header
  // If this value is 0 then it will be set to 14

#if O2_LSH_N_POINT_BITS > 15
#error "AudioDB Compile ERROR: consistency check of O2_LSH_POINT_BITS failed (>15)"
#endif

  header->flags |= LSH_N_POINT_BITS << 28;

  // If database will fit in a single file the vectors are copied into the AudioDB instance
  // Else all the vectors are left on the FileSystem and we use the dataOffset as storage
  // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
  if(ntracks<O2_LARGE_ADB_NTRACKS && datasize<O2_LARGE_ADB_SIZE){
    header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + databytes);
    header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + 2*auxbytes);
    header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + auxbytes);
    header->dbSize = ALIGN_PAGE_UP(header->l2normTableOffset + auxbytes);
  } else { // Create LARGE_ADB, features and powers kept on filesystem
    header->flags |= O2_FLAG_LARGE_ADB;
    header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
    header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
    header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
    header->dbSize = header->l2normTableOffset;
  }

  if (write(fd, header, O2_HEADERSIZE) != O2_HEADERSIZE) {
    goto error;
  }

  // go to the location corresponding to the last byte
  if (lseek (fd, header->dbSize - 1, SEEK_SET) == -1) {
    goto error;
  }

  // write a dummy byte at the last location
  if (write (fd, "", 1) != 1) {
      goto error;
  }

  free(header);
  return audiodb_open(path, O_RDWR);

 error:
  if(header) {
    free(header);
  }
  return NULL;
}