view insert.cpp @ 409:99e6cbad7f76 api-inversion

The lesser of two evils, part 2. Implement paths through audiodb_insert_datum_internal() for databases with O2_FLAG_LARGE_ADB, including in some of the helper functions. Most of the nasty stuff is concentrated in writing out the paths in what is now step 6, and everything else looks much as before, apart from a renumbering of the steps taken. Now we can implement audiodb_insert() for O2_FLAG_LARGE_ADB databases; we need to construct an adb_datum_internal_t from our adb_insert_t, but that's straightforward -- even just about straightforward enough to do it inline. Then audioDB::batchinsert() can be rewritten completely in terms of API functions, and doesn't need any kind of special treatment for the large case. Hooray. The real point of that is of course that we can now delete wodges of dead code, and move out audioDB::insert and audioDB::batchinsert into audioDB.cpp, because all they're doing now is dealing with command-line logic. This point marks the limit of what can be achieved in terms of "API inversion" at this time; the only remaining function, audiodb_query() / audioDB::query cannot be inverted because its API implementation is incomplete. Future plans, in some order: - merge this branch to trunk (check with current API/ABI clients); - complete audiodb_query() implementation; - invert audioDB::query / audiodb_query(); - MORE TESTS; - remove audioDB.cpp from list of files compiled into the library; - implement missing API functions (index, liszt, sample) directly; - source code rearrangement into library and command-line directories; - include bindings to library for some plausible candidate environments (Perl, Python, Lisp, Pd, Max/MSP) as examples; - API documentation.
author mas01cr
date Tue, 09 Dec 2008 22:48:30 +0000
parents f0a69693eaef
children d7e590d58c85
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
}
#include "audioDB-internals.h"

static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
  adb_header_t *header = adb->header;
  if(header->flags & O2_FLAG_LARGE_ADB) {
    return true;
  } else {
    /* FIXME: timesTableOffset isn't necessarily the next biggest
     * offset after dataOffset.  Maybe make the offsets into an array
     * that we can iterate over... */
    return (header->timesTableOffset > 
            (header->dataOffset + header->length + size));
  }
}

static bool audiodb_enough_per_file_space_free(adb_t *adb) {
  /* FIXME: the comment above about the ordering of the tables applies
     here too. */
  adb_header_t *header = adb->header;
  off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
  off_t track_table_length = header->dataOffset - header->trackTableOffset;
  int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE;
  int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE;
  /* maxfiles is the _minimum_ of the two.  Do not be confused... */
  int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
  if(header->flags & O2_FLAG_LARGE_ADB) {
    /* by default, these tables are created with the same size as the
     * fileTable (which should be called key_table); relying on that
     * always being the case, though, smacks of optimism, so instead
     * we code defensively... */
    off_t data_table_length = header->timesTableOffset - header->dataOffset;
    off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
    off_t power_table_length = header->dbSize - header->powerTableOffset;
    int dmaxfiles = data_table_length / O2_FILETABLE_ENTRY_SIZE;
    int timaxfiles = times_table_length / O2_FILETABLE_ENTRY_SIZE;
    int pmaxfiles = power_table_length / O2_FILETABLE_ENTRY_SIZE;
    /* ... even though it means a certain amount of tedium. */
    maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
    maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
    maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
  }
  return (header->numFiles < (unsigned int) maxfiles);
}

/*
 * Hey, look, a comment.  Normally I wouldn't bother, as the code
 * should be self-documenting, but a lot of logic is concentrated in
 * this one place, so let's give an overview beforehand.  To insert a
 * datum into the database, we:
 *
 *  1. check write permission;
 *  2. check for enough space;
 *  3. check that datum->dim and adb->header->dim agree (or that the
 *     header dimension is zero, in which case write datum->dim to
 *     adb->header->dim).
 *  4. check for presence of datum->key in adb->keys;
 *  5. check for consistency between power and O2_FLAG_POWER, and 
 *     times and O2_FLAG_TIMES;
 *  6. write in data, power, times as appropriate; add to track
 *     and key tables too;
 *  7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
 *     in table;
 *  8. update adb->keys and adb->header;
 *  9. sync adb->header with disk.
 *
 * Step 9 essentially commits the transaction; until we update
 * header->length, nothing will recognize the newly-written data.  In
 * principle, if it fails, we should roll back, which we can in fact
 * do on the assumption that nothing in step 8 can ever fail; on the
 * other hand, if it's failed, then it's unlikely that rolling back by
 * syncing the original header back to disk is going to work
 * desperately well.  We should perhaps take an operating-system lock
 * around step 9, so that we can't be interrupted part-way through
 * (except of course for SIGKILL, but if we're hit with that we will
 * always lose).
 */
static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {

  off_t size, offset, nfiles;
  double *l2norm_buffer, *lp, *dp;

  /* 1. check write permission; */
  if(!(adb->flags & O_RDWR)) {
    return 1;
  }
  /* 2. check for enough space; */
  size = sizeof(double) * datum->nvectors * datum->dim;
  if(!audiodb_enough_data_space_free(adb, size)) {
    return 1;
  }
  if(!audiodb_enough_per_file_space_free(adb)) {
    return 1;
  }
  /* 3. check that datum->dim and adb->header->dim agree (or that the
   *    header dimension is zero, in which case write datum->dim to
   *    adb->header->dim).
   */
  if(adb->header->dim == 0) {
    adb->header->dim = datum->dim;
  } else if (adb->header->dim != datum->dim) {
    return 1;
  }
  /* 4. check for presence of datum->key in adb->keys; */
  if(adb->keys->count(datum->key)) {
    /* not part of an explicit API/ABI, but we need a distinguished
       value in this circumstance to preserve somewhat wonky behaviour
       of audioDB::batchinsert. */
    return 2;
  }
  /* 5. check for consistency between power and O2_FLAG_POWER, and
   *    times and O2_FLAG_TIMES; 
   */
  if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) ||
     ((adb->header->flags & O2_FLAG_POWER) && !datum->power)) {
    return 1;
  }
  if(datum->times && !(adb->header->flags & O2_FLAG_TIMES)) {
    if(adb->header->numFiles == 0) {
      adb->header->flags |= O2_FLAG_TIMES;
    } else {
      return 1;
    }
  } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) {
    return 1;
  }
  /* 6. write in data, power, times as appropriate; add to track
   *    and key tables too;
   */
  offset = adb->header->length;
  nfiles = adb->header->numFiles;

  /* FIXME: checking for all these lseek()s and write()s */
  lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
  write(adb->fd, datum->key, strlen(datum->key)+1);
  lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
  write(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE);
  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
    char cwd[PATH_MAX];
    char slash = '/';

    getcwd(cwd, PATH_MAX);
    lseek(adb->fd, adb->header->dataOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
    if(*((char *) datum->data) != '/') {
      write(adb->fd, cwd, strlen(cwd));
      write(adb->fd, &slash, 1);
    }
    write(adb->fd, datum->data, strlen((const char *) datum->data)+1);
    if(datum->power) {
      lseek(adb->fd, adb->header->powerTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
      if(*((char *) datum->power) != '/') {
        write(adb->fd, cwd, strlen(cwd));
        write(adb->fd, &slash, 1);
      }
      write(adb->fd, datum->power, strlen((const char *) datum->power)+1);
    }
    if(datum->times) {
      lseek(adb->fd, adb->header->timesTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
      if(*((char *) datum->times) != '/') {
        write(adb->fd, cwd, strlen(cwd));
        write(adb->fd, &slash, 1);
      }
      write(adb->fd, datum->times, strlen((const char *) datum->times)+1);
    }
  } else {
    lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
    write(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
    if(datum->power) {
      lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
      write(adb->fd, datum->power, sizeof(double) * datum->nvectors);
    }
    if(datum->times) {
      lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
      write(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
    }
  }

  /* 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
   *    in table;
   */
  if((adb->header->flags & O2_FLAG_L2NORM) &&
     !(adb->header->flags & O2_FLAG_LARGE_ADB)) {
    l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
    
    /* FIXME: shared code with audiodb_norm_existing() */
    dp = (double *) datum->data;
    lp = l2norm_buffer;
    for(size_t i = 0; i < datum->nvectors; i++) {
      *lp = 0;
      for(unsigned int k = 0; k < datum->dim; k++) {
        *lp += (*dp)*(*dp);
        dp++;
      }
      lp++;
    }
    lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
    write(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
    free(l2norm_buffer);
  }

  /* 8. update adb->keys and adb->header; */
  adb->keys->insert(datum->key);
  adb->header->numFiles += 1;
  adb->header->length += sizeof(double) * datum->nvectors * datum->dim;

  /* 9. sync adb->header with disk. */
  return audiodb_sync_header(adb);

 error:
  return 1;
}

int audiodb_insert_datum(adb_t *adb, adb_datum_t *datum) {
  adb_datum_internal_t d;
  d.nvectors = datum->nvectors;
  d.dim = datum->dim;
  d.key = datum->key;
  d.data = datum->data;
  d.times = datum->times;
  d.power = datum->power;
  return audiodb_insert_datum_internal(adb, &d);
}

static int audiodb_free_datum(adb_datum_t *datum) {
  if(datum->data) {
    free(datum->data);
  }
  if(datum->power) {
    free(datum->power);
  }
  if(datum->times) {
    free(datum->times);
  }
  return 0;
}

static int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum) {
  int fd = 0;
  FILE *file = NULL;
  struct stat st;
  off_t size;

  datum->data = NULL;
  datum->power = NULL;
  datum->times = NULL;
  if((fd = open(insert->features, O_RDONLY)) == -1) {
    goto error;
  }
  if(fstat(fd, &st)) {
    goto error;
  }
  read(fd, &(datum->dim), sizeof(uint32_t));
  size = st.st_size - sizeof(uint32_t);
  datum->nvectors = size / (sizeof(double) * datum->dim);
  datum->data = (double *) malloc(size);
  if(!datum->data) {
    goto error;
  }
  read(fd, datum->data, size);
  close(fd);
  fd = 0;
  if(insert->power) {
    int dim;
    if((fd = open(insert->power, O_RDONLY)) == -1) {
      goto error;
    }
    if(fstat(fd, &st)) {
      goto error;
    }
    if((st.st_size - sizeof(uint32_t)) != (size / datum->dim)) {
      goto error;
    }
    read(fd, &dim, sizeof(uint32_t));
    if(dim != 1) {
      goto error;
    }
    datum->power = (double *) malloc(size / datum->dim);
    if(!datum->power) {
      goto error;
    }
    read(fd, datum->power, size / datum->dim);
    close(fd);
  }
  if(insert->times) {
    double t, *tp;
    if(!(file = fopen(insert->times, "r"))) {
      goto error;
    }
    datum->times = (double *) malloc(2 * size / datum->dim);
    if(!datum->times) {
      goto error;
    }
    if(fscanf(file, " %lf", &t) != 1) {
      goto error;
    }
    tp = datum->times;
    *tp++ = t;
    for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
      if(fscanf(file, " %lf", &t) != 1) {
        goto error;
      }
      *tp++ = t;
      *tp++ = t;
    }
    if(fscanf(file, " %lf", &t) != 1) {
      goto error;
    }
    *tp = t;
    fclose(file);
  }
  datum->key = insert->key ? insert->key : insert->features;
  return 0;

 error:
  if(fd > 0) {
    close(fd);
  }
  if(file) {
    fclose(file);
  }
  audiodb_free_datum(datum);
  return 1;
}

int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
    adb_datum_internal_t d;
    struct stat st;
    int fd;
    int err;
    off_t size;
    
    if((fd = open(insert->features, O_RDONLY)) == -1) {
      return 1;
    }
    if(fstat(fd, &st)) {
      return 1;
    }
    read(fd, &(d.dim), sizeof(uint32_t));
    close(fd);
    size = st.st_size - sizeof(uint32_t);
    d.nvectors = size / (sizeof(double) * d.dim);
    d.data = (void *) insert->features;
    if(insert->power) {
      if(stat(insert->power, &st)) {
        return 1;
      }
    }
    d.power = (void *) insert->power;
    if(insert->times) {
      if(stat(insert->times, &st)) {
        return 1;
      }
    }
    d.times = (void *) insert->times;
    d.key = insert->key ? insert->key : insert->features;
    err = audiodb_insert_datum_internal(adb, &d);

    if(err == 2) {
      return 0;
    } else {
      return err;
    }
  } else {
    adb_datum_t datum;
    int err;

    if(audiodb_insert_create_datum(insert, &datum)) {
      return 1;
    }
    err = audiodb_insert_datum(adb, &datum);
    audiodb_free_datum(&datum);

    if(err == 2) {
      return 0;
    } else {
      return err;
    }
  }
}

int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
  int err;
  for(unsigned int n = 0; n < size; n++) {
    if((err = audiodb_insert(adb, &(insert[n])))) {
      return err;
    }
  }
  return 0;
}