view create.cpp @ 409:99e6cbad7f76 api-inversion

The lesser of two evils, part 2. Implement paths through audiodb_insert_datum_internal() for databases with O2_FLAG_LARGE_ADB, including in some of the helper functions. Most of the nasty stuff is concentrated in writing out the paths in what is now step 6, and everything else looks much as before, apart from a renumbering of the steps taken. Now we can implement audiodb_insert() for O2_FLAG_LARGE_ADB databases; we need to construct an adb_datum_internal_t from our adb_insert_t, but that's straightforward -- even just about straightforward enough to do it inline. Then audioDB::batchinsert() can be rewritten completely in terms of API functions, and doesn't need any kind of special treatment for the large case. Hooray. The real point of that is of course that we can now delete wodges of dead code, and move out audioDB::insert and audioDB::batchinsert into audioDB.cpp, because all they're doing now is dealing with command-line logic. This point marks the limit of what can be achieved in terms of "API inversion" at this time; the only remaining function, audiodb_query() / audioDB::query cannot be inverted because its API implementation is incomplete. Future plans, in some order: - merge this branch to trunk (check with current API/ABI clients); - complete audiodb_query() implementation; - invert audioDB::query / audiodb_query(); - MORE TESTS; - remove audioDB.cpp from list of files compiled into the library; - implement missing API functions (index, liszt, sample) directly; - source code rearrangement into library and command-line directories; - include bindings to library for some plausible candidate environments (Perl, Python, Lisp, Pd, Max/MSP) as examples; - API documentation.
author mas01cr
date Tue, 09 Dec 2008 22:48:30 +0000
parents a82a2d9b2451
children d7e590d58c85
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
}
/* Make a new database.

IF size(featuredata) < O2_LARGE_ADB_SIZE
   The database consists of:

   * a header (see dbTableHeader struct definition);
   * keyTable: list of keys of tracks;
   * trackTable: Maps implicit feature index to a feature vector
     matrix (sizes of tracks)
   * featureTable: Lots of doubles;
   * timesTable: (start,end) time points for each feature vector;
   * powerTable: associated power for each feature vector;
   * l2normTable: squared l2norms for each feature vector.

ELSE the database consists of:

   * a header (see dbTableHeader struct definition);
   * keyTable: list of keys of tracks
   * trackTable: sizes of tracks
   * featureTable: list of feature file names
   * timesTable: list of times file names
   * powerTable: list of power file names

*/

adb_t *audiodb_create(const char *path, unsigned datasize, unsigned ntracks, unsigned datadim) {
  int fd;
  adb_header_t *header = 0;
  off_t databytes, auxbytes;
  if(datasize == 0) {
    datasize = O2_DEFAULT_DATASIZE;
  }
  if(ntracks == 0) {
    ntracks = O2_DEFAULT_NTRACKS;
  }
  if(datadim == 0) {
    datadim = O2_DEFAULT_DATADIM;
  }

  if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
    goto error;
  }
  if (acquire_lock(fd, true)) {
    goto error;
  }

  header = (adb_header_t *) malloc(sizeof(adb_header_t));
  if(!header) {
    goto error;
  }

  // Initialize header
  header->magic = O2_MAGIC;
  header->version = O2_FORMAT_VERSION;
  header->numFiles = 0;
  header->dim = 0;
  header->flags = 0;
  header->headerSize = O2_HEADERSIZE;
  header->length = 0;
  header->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
  header->trackTableOffset = ALIGN_PAGE_UP(header->fileTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
  header->dataOffset = ALIGN_PAGE_UP(header->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks);

  databytes = ((off_t) datasize) * 1024 * 1024;
  auxbytes = databytes / datadim;

  /* FIXME: what's going on here?  There are two distinct
     preprocessor constants (O2_LSH_N_POINT_BITS, LSH_N_POINT_BITS);
     a third is presumably some default
     (O2_DEFAULT_LSH_N_POINT_BITS), and then there's this magic 28
     bits.  Should this really be part of the flags structure at
     all?  Putting it elsewhere will of course break backwards
     compatibility, unless 14 is the only value that's been used
     anywhere... */

  // For backward-compatibility, Record the point-encoding parameter for LSH indexing in the adb header
  // If this value is 0 then it will be set to 14

#if O2_LSH_N_POINT_BITS > 15
#error "AudioDB Compile ERROR: consistency check of O2_LSH_POINT_BITS failed (>15)"
#endif

  header->flags |= LSH_N_POINT_BITS << 28;

  // If database will fit in a single file the vectors are copied into the AudioDB instance
  // Else all the vectors are left on the FileSystem and we use the dataOffset as storage
  // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
  if(ntracks<O2_LARGE_ADB_NTRACKS && datasize<O2_LARGE_ADB_SIZE){
    header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + databytes);
    header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + 2*auxbytes);
    header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + auxbytes);
    header->dbSize = ALIGN_PAGE_UP(header->l2normTableOffset + auxbytes);
  } else { // Create LARGE_ADB, features and powers kept on filesystem
    header->flags |= O2_FLAG_LARGE_ADB;
    header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
    header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
    header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
    header->dbSize = header->l2normTableOffset;
  }

  if (write(fd, header, O2_HEADERSIZE) != O2_HEADERSIZE) {
    goto error;
  }

  // go to the location corresponding to the last byte
  if (lseek (fd, header->dbSize - 1, SEEK_SET) == -1) {
    goto error;
  }

  // write a dummy byte at the last location
  if (write (fd, "", 1) != 1) {
      goto error;
  }

  free(header);
  return audiodb_open(path, O_RDWR);

 error:
  if(header) {
    free(header);
  }
  return NULL;
}