view create.cpp @ 548:e18843dc0aea

Implement a rudimentary API for audioDB::liszt The API is rudimentary because we've dropped support for the incremental retrieval of tracks and their number of vectors (at the API level; the SOAP and command-line support is still there -- no changes should be visible). This is potentially bad for the large-scale databases, of course; one million tracks will take of the order of 16MB of RAM, more if I'm unlucky about how std::string.c_str() is implemented. Both this liszt operation and querying (and sampling, forthcoming...) would benefit from a `cursor-like' interface to retrieval results: for an API like that, instead of getting a struct with the data there, you get a cookie with which you can ask the database for successive results. This would be neat for all sorts of reasons. In the meantime, at least this change fixes SOAP memory leaks related to liszt. Make liszt.o part of LIBOBJS rather than ordinary OBJS, so that the liszt functionality is actually compiled into the library. Add a test for this library functionality; also modify the command-line test file to run the SOAP server on its own port.
author mas01cr
date Wed, 11 Feb 2009 12:38:03 +0000
parents 57e459f62788
children 4eedc18634f5
line wrap: on
line source
extern "C" {
#include "audioDB_API.h"
}
#include "audioDB-internals.h"

/* Make a new database.

(FIXME: this text, in particular the conditional, will not be true 
once we implement create flags rather than defaulting on format based
on the requested size arguments)

IF size(featuredata) < ADB_FIXME_LARGE_ADB_SIZE
   The database consists of:

   * a header (see adb_header_t definition);
   * keyTable: list of keys of tracks;
   * trackTable: Maps implicit feature index to a feature vector
     matrix (sizes of tracks)
   * featureTable: Lots of doubles;
   * timesTable: (start,end) time points for each feature vector;
   * powerTable: associated power for each feature vector;
   * l2normTable: squared l2norms for each feature vector.

ELSE the database consists of:

   * a header (see adb_header_t definition);
   * keyTable: list of keys of tracks
   * trackTable: sizes of tracks
   * featureTable: list of feature file names
   * timesTable: list of times file names
   * powerTable: list of power file names

*/

adb_t *audiodb_create(const char *path, unsigned datasize, unsigned ntracks, unsigned datadim) {
  int fd;
  adb_header_t *header = 0;
  off_t databytes, auxbytes;
  if(datasize == 0) {
    datasize = ADB_DEFAULT_DATASIZE;
  }
  if(ntracks == 0) {
    ntracks = ADB_DEFAULT_NTRACKS;
  }
  if(datadim == 0) {
    datadim = ADB_DEFAULT_DATADIM;
  }

  if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
    goto error;
  }
  if (acquire_lock(fd, true)) {
    goto error;
  }

  header = (adb_header_t *) malloc(sizeof(adb_header_t));
  if(!header) {
    goto error;
  }

  // Initialize header
  header->magic = ADB_MAGIC;
  header->version = ADB_FORMAT_VERSION;
  header->numFiles = 0;
  header->dim = 0;
  header->flags = 0;
  header->headerSize = ADB_HEADER_SIZE;
  header->length = 0;
  header->fileTableOffset = align_page_up(ADB_HEADER_SIZE);
  header->trackTableOffset = align_page_up(header->fileTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks); //
  header->dataOffset = align_page_up(header->trackTableOffset + ADB_TRACKTABLE_ENTRY_SIZE*ntracks);

  databytes = ((off_t) datasize) * 1024 * 1024;
  auxbytes = databytes / datadim;

  // If database will fit in a single file the vectors are copied into the AudioDB instance
  // Else all the vectors are left on the FileSystem and we use the dataOffset as storage
  // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
  if(ntracks < ADB_FIXME_LARGE_ADB_NTRACKS && datasize < ADB_FIXME_LARGE_ADB_SIZE) {
    header->timesTableOffset = align_page_up(header->dataOffset + databytes);
    header->powerTableOffset = align_page_up(header->timesTableOffset + 2*auxbytes);
    header->l2normTableOffset = align_page_up(header->powerTableOffset + auxbytes);
    header->dbSize = align_page_up(header->l2normTableOffset + auxbytes);
  } else { // Create REFERENCES ADB, features and powers kept on filesystem
    header->flags |= ADB_HEADER_FLAG_REFERENCES;
    header->timesTableOffset = align_page_up(header->dataOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
    header->powerTableOffset = align_page_up(header->timesTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
    header->l2normTableOffset = align_page_up(header->powerTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
    header->dbSize = header->l2normTableOffset;
  }

  write_or_goto_error(fd, header, ADB_HEADER_SIZE);

  // go to the location corresponding to the last byte
  if (lseek (fd, header->dbSize - 1, SEEK_SET) == -1) {
    goto error;
  }

  // write a dummy byte at the last location
  write_or_goto_error(fd, "", 1);

  free(header);
  return audiodb_open(path, O_RDWR);

 error:
  if(header) {
    free(header);
  }
  return NULL;
}