view create.cpp @ 392:78fed0d4c108 api-inversion

Include some necessary information in struct adb. Now the struct adb contains a database fd, the flags used to open that fd (so that we can later tell if it was for write or not) and a database header pointer. audiodb_open() is now responsible for filling in all of that information. To do that, it needs to take an open(2) flag; that's good, because it means that the call to open(2) is no longer invoking undefined behaviour. (Also, the previous version of audiodb_open() leaked an fd). Unfortunately, that means we have broken ABI and API compatibility. (Fortunately, we have fewer than 12 users). Use audiodb_open() in audioDB::initDBHeader(). We've temporarily(?) put acquire_lock(int, bool) in the API header; that means we need to include <stdbool.h> and compile C files with -std=c99. Do so. Make audiodb_close() free resources allocated by audiodb_open(). Include a struct adb * field in the audioDB C++ object... ... which lets us actually implement memory-correctness, by audiodb_close()ing the database in audioDB::cleanup(). [ The lock is, I think, correctly disposed of; man fcntl(2) on Linux says that the locks are released once any file descriptor relating to the file is closed, and we close the fd in audiodb_close(). ]
author mas01cr
date Mon, 24 Nov 2008 15:42:15 +0000
parents f20571eeb9a6
children a82a2d9b2451
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
}
/* Make a new database.

IF size(featuredata) < O2_LARGE_ADB_SIZE 
   The database consists of:

   * a header (see dbTableHeader struct definition);
   * keyTable: list of keys of tracks;
   * trackTable: Maps implicit feature index to a feature vector
     matrix (sizes of tracks)
   * featureTable: Lots of doubles;
   * timesTable: (start,end) time points for each feature vector;
   * powerTable: associated power for each feature vector;
   * l2normTable: squared l2norms for each feature vector.
   
ELSE the database consists of:
   
   * a header (see dbTableHeader struct definition);
   * keyTable: list of keys of tracks
   * trackTable: sizes of tracks
   * featureTable: list of feature file names
   * timesTable: list of times file names
   * powerTable: list of power file names

*/

extern "C" {
  adb_t *audiodb_create(const char *path, unsigned datasize, unsigned ntracks, unsigned datadim) {
    int fd;
    adb_header_t *header = 0;
    off_t databytes, auxbytes;
    if(datasize == 0) {
      datasize = O2_DEFAULT_DATASIZE;
    }
    if(ntracks == 0) {
      ntracks = O2_DEFAULT_NTRACKS;
    }
    if(datadim == 0) {
      datadim = O2_DEFAULT_DATADIM;
    }

    if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
      goto error;
    }
    if (acquire_lock(fd, true)) {
      goto error;
    }

    header = (adb_header_t *) malloc(sizeof(adb_header_t));
    if(!header) {
      goto error;
    }
    
    // Initialize header
    header->magic = O2_MAGIC;
    header->version = O2_FORMAT_VERSION;
    header->numFiles = 0;
    header->dim = 0;
    header->flags = 0;
    header->headerSize = O2_HEADERSIZE;
    header->length = 0;
    header->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
    header->trackTableOffset = ALIGN_PAGE_UP(header->fileTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
    header->dataOffset = ALIGN_PAGE_UP(header->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks);
    
    databytes = ((off_t) datasize) * 1024 * 1024;
    auxbytes = databytes / datadim;

    /* FIXME: what's going on here?  There are two distinct
       preprocessor constants (O2_LSH_N_POINT_BITS, LSH_N_POINT_BITS);
       a third is presumably some default
       (O2_DEFAULT_LSH_N_POINT_BITS), and then there's this magic 28
       bits.  Should this really be part of the flags structure at
       all?  Putting it elsewhere will of course break backwards
       compatibility, unless 14 is the only value that's been used
       anywhere... */
    
    // For backward-compatibility, Record the point-encoding parameter for LSH indexing in the adb header
    // If this value is 0 then it will be set to 14
    
#if O2_LSH_N_POINT_BITS > 15
#error "AudioDB Compile ERROR: consistency check of O2_LSH_POINT_BITS failed (>15)"
#endif
    
    header->flags |= LSH_N_POINT_BITS << 28;
    
    // If database will fit in a single file the vectors are copied into the AudioDB instance
    // Else all the vectors are left on the FileSystem and we use the dataOffset as storage
    // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
    if(ntracks<O2_LARGE_ADB_NTRACKS && datasize<O2_LARGE_ADB_SIZE){
      header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + databytes);
      header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + 2*auxbytes);
      header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + auxbytes);
      header->dbSize = ALIGN_PAGE_UP(header->l2normTableOffset + auxbytes);
    } else { // Create LARGE_ADB, features and powers kept on filesystem 
      header->flags |= O2_FLAG_LARGE_ADB;
      header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
      header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
      header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
      header->dbSize = header->l2normTableOffset;
    } 
    
    if (write(fd, header, O2_HEADERSIZE) != O2_HEADERSIZE) {
      goto error;
    }
    
    // go to the location corresponding to the last byte
    if (lseek (fd, header->dbSize - 1, SEEK_SET) == -1) {
      goto error;
    }

    // write a dummy byte at the last location
    if (write (fd, "", 1) != 1) {
      goto error;
    }

    free(header);
    return audiodb_open(path, O_RDWR);

  error:
    if(header) {
      free(header);
    }
    return NULL;
  }
}