Mercurial > hg > audiodb

#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
}
#include "audioDB-internals.h"

static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
  adb_header_t *header = adb->header;
  if(header->flags & O2_FLAG_LARGE_ADB) {
    return true;
  } else {
    /* FIXME: timesTableOffset isn't necessarily the next biggest
     * offset after dataOffset.  Maybe make the offsets into an array
     * that we can iterate over... */
    return (header->timesTableOffset >
            (header->dataOffset + header->length + size));
  }
}

static bool audiodb_enough_per_file_space_free(adb_t *adb) {
  /* FIXME: the comment above about the ordering of the tables applies
     here too. */
  adb_header_t *header = adb->header;
  off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
  off_t track_table_length = header->dataOffset - header->trackTableOffset;
  int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE;
  int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE;
  /* maxfiles is the _minimum_ of the two.  Do not be confused... */
  int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
  if(header->flags & O2_FLAG_LARGE_ADB) {
    /* by default, these tables are created with the same size as the
     * fileTable (which should be called key_table); relying on that
     * always being the case, though, smacks of optimism, so instead
     * we code defensively... */
    off_t data_table_length = header->timesTableOffset - header->dataOffset;
    off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
    off_t power_table_length = header->dbSize - header->powerTableOffset;
    int dmaxfiles = data_table_length / O2_FILETABLE_ENTRY_SIZE;
    int timaxfiles = times_table_length / O2_FILETABLE_ENTRY_SIZE;
    int pmaxfiles = power_table_length / O2_FILETABLE_ENTRY_SIZE;
    /* ... even though it means a certain amount of tedium. */
    maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
    maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
    maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
  }
  return (header->numFiles < (unsigned int) maxfiles);
}

/*
 * Hey, look, a comment.  Normally I wouldn't bother, as the code
 * should be self-documenting, but a lot of logic is concentrated in
 * this one place, so let's give an overview beforehand.  To insert a
 * datum into the database, we:
 *
 *  1. check write permission;
 *  2. check for enough space;
 *  3. check that datum->dim and adb->header->dim agree (or that the
 *     header dimension is zero, in which case write datum->dim to
 *     adb->header->dim).
 *  4. check for presence of datum->key in adb->keys;
 *  5. check for consistency between power and O2_FLAG_POWER, and
 *     times and O2_FLAG_TIMES;
 *  6. write in data, power, times as appropriate; add to track
 *     and key tables too;
 *  7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
 *     in table;
 *  8. update adb->keys and adb->header;
 *  9. sync adb->header with disk.
 *
 * Step 9 essentially commits the transaction; until we update
 * header->length, nothing will recognize the newly-written data.  In
 * principle, if it fails, we should roll back, which we can in fact
 * do on the assumption that nothing in step 8 can ever fail; on the
 * other hand, if it's failed, then it's unlikely that rolling back by
 * syncing the original header back to disk is going to work
 * desperately well.  We should perhaps take an operating-system lock
 * around step 9, so that we can't be interrupted part-way through
 * (except of course for SIGKILL, but if we're hit with that we will
 * always lose).
 */
static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {

  off_t size, offset, nfiles;
  double *l2norm_buffer, *lp, *dp;

  /* 1. check write permission; */
  if(!(adb->flags & O_RDWR)) {
    return 1;
  }
  /* 2. check for enough space; */
  size = sizeof(double) * datum->nvectors * datum->dim;
  if(!audiodb_enough_data_space_free(adb, size)) {
    return 1;
  }
  if(!audiodb_enough_per_file_space_free(adb)) {
    return 1;
  }
  /* 3. check that datum->dim and adb->header->dim agree (or that the
   *    header dimension is zero, in which case write datum->dim to
   *    adb->header->dim).
   */
  if(adb->header->dim == 0) {
    adb->header->dim = datum->dim;
  } else if (adb->header->dim != datum->dim) {
    return 1;
  }
  /* 4. check for presence of datum->key in adb->keys; */
  if(adb->keys->count(datum->key)) {
    /* not part of an explicit API/ABI, but we need a distinguished
       value in this circumstance to preserve somewhat wonky behaviour
       of audioDB::batchinsert. */
    return 2;
  }
  /* 5. check for consistency between power and O2_FLAG_POWER, and
   *    times and O2_FLAG_TIMES;
   */
  if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) ||
     ((adb->header->flags & O2_FLAG_POWER) && !datum->power)) {
    return 1;
  }
  if(datum->times && !(adb->header->flags & O2_FLAG_TIMES)) {
    if(adb->header->numFiles == 0) {
      adb->header->flags |= O2_FLAG_TIMES;
    } else {
      return 1;
    }
  } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) {
    return 1;
  }
  /* 6. write in data, power, times as appropriate; add to track
   *    and key tables too;
   */
  offset = adb->header->length;
  nfiles = adb->header->numFiles;

  /* FIXME: checking for all these lseek()s and write()s */
  lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
  write(adb->fd, datum->key, strlen(datum->key)+1);
  lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
  write(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE);
  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
    char cwd[PATH_MAX];
    char slash = '/';

    getcwd(cwd, PATH_MAX);
    lseek(adb->fd, adb->header->dataOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
    if(*((char *) datum->data) != '/') {
      write(adb->fd, cwd, strlen(cwd));
      write(adb->fd, &slash, 1);
    }
    write(adb->fd, datum->data, strlen((const char *) datum->data)+1);
    if(datum->power) {
      lseek(adb->fd, adb->header->powerTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
      if(*((char *) datum->power) != '/') {
        write(adb->fd, cwd, strlen(cwd));
        write(adb->fd, &slash, 1);
      }
      write(adb->fd, datum->power, strlen((const char *) datum->power)+1);
    }
    if(datum->times) {
      lseek(adb->fd, adb->header->timesTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
      if(*((char *) datum->times) != '/') {
        write(adb->fd, cwd, strlen(cwd));
        write(adb->fd, &slash, 1);
      }
      write(adb->fd, datum->times, strlen((const char *) datum->times)+1);
    }
  } else {
    lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
    write(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
    if(datum->power) {
      lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
      write(adb->fd, datum->power, sizeof(double) * datum->nvectors);
    }
    if(datum->times) {
      lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
      write(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
    }
  }

  /* 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
   *    in table;
   */
  if((adb->header->flags & O2_FLAG_L2NORM) &&
     !(adb->header->flags & O2_FLAG_LARGE_ADB)) {
    l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));

    /* FIXME: shared code with audiodb_norm_existing() */
    dp = (double *) datum->data;
    lp = l2norm_buffer;
    for(size_t i = 0; i < datum->nvectors; i++) {
      *lp = 0;
      for(unsigned int k = 0; k < datum->dim; k++) {
        *lp += (*dp)*(*dp);
        dp++;
      }
      lp++;
    }
    lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
    write(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
    free(l2norm_buffer);
  }

  /* 8. update adb->keys and adb->header; */
  adb->keys->insert(datum->key);
  adb->header->numFiles += 1;
  adb->header->length += sizeof(double) * datum->nvectors * datum->dim;

  /* 9. sync adb->header with disk. */
  return audiodb_sync_header(adb);

 error:
  return 1;
}

int audiodb_insert_datum(adb_t *adb, adb_datum_t *datum) {
  adb_datum_internal_t d;
  d.nvectors = datum->nvectors;
  d.dim = datum->dim;
  d.key = datum->key;
  d.data = datum->data;
  d.times = datum->times;
  d.power = datum->power;
  return audiodb_insert_datum_internal(adb, &d);
}

static int audiodb_free_datum(adb_datum_t *datum) {
  if(datum->data) {
    free(datum->data);
  }
  if(datum->power) {
    free(datum->power);
  }
  if(datum->times) {
    free(datum->times);
  }
  return 0;
}

static int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum) {
  int fd = 0;
  FILE *file = NULL;
  struct stat st;
  off_t size;

  datum->data = NULL;
  datum->power = NULL;
  datum->times = NULL;
  if((fd = open(insert->features, O_RDONLY)) == -1) {
    goto error;
  }
  if(fstat(fd, &st)) {
    goto error;
  }
  read(fd, &(datum->dim), sizeof(uint32_t));
  size = st.st_size - sizeof(uint32_t);
  datum->nvectors = size / (sizeof(double) * datum->dim);
  datum->data = (double *) malloc(size);
  if(!datum->data) {
    goto error;
  }
  read(fd, datum->data, size);
  close(fd);
  fd = 0;
  if(insert->power) {
    int dim;
    if((fd = open(insert->power, O_RDONLY)) == -1) {
      goto error;
    }
    if(fstat(fd, &st)) {
      goto error;
    }
    if((st.st_size - sizeof(uint32_t)) != (size / datum->dim)) {
      goto error;
    }
    read(fd, &dim, sizeof(uint32_t));
    if(dim != 1) {
      goto error;
    }
    datum->power = (double *) malloc(size / datum->dim);
    if(!datum->power) {
      goto error;
    }
    read(fd, datum->power, size / datum->dim);
    close(fd);
  }
  if(insert->times) {
    double t, *tp;
    if(!(file = fopen(insert->times, "r"))) {
      goto error;
    }
    datum->times = (double *) malloc(2 * size / datum->dim);
    if(!datum->times) {
      goto error;
    }
    if(fscanf(file, " %lf", &t) != 1) {
      goto error;
    }
    tp = datum->times;
    *tp++ = t;
    for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
      if(fscanf(file, " %lf", &t) != 1) {
        goto error;
      }
      *tp++ = t;
      *tp++ = t;
    }
    if(fscanf(file, " %lf", &t) != 1) {
      goto error;
    }
    *tp = t;
    fclose(file);
  }
  datum->key = insert->key ? insert->key : insert->features;
  return 0;

 error:
  if(fd > 0) {
    close(fd);
  }
  if(file) {
    fclose(file);
  }
  audiodb_free_datum(datum);
  return 1;
}

int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
    adb_datum_internal_t d;
    struct stat st;
    int fd;
    int err;
    off_t size;

    if((fd = open(insert->features, O_RDONLY)) == -1) {
      return 1;
    }
    if(fstat(fd, &st)) {
      return 1;
    }
    read(fd, &(d.dim), sizeof(uint32_t));
    close(fd);
    size = st.st_size - sizeof(uint32_t);
    d.nvectors = size / (sizeof(double) * d.dim);
    d.data = (void *) insert->features;
    if(insert->power) {
      if(stat(insert->power, &st)) {
        return 1;
      }
    }
    d.power = (void *) insert->power;
    if(insert->times) {
      if(stat(insert->times, &st)) {
        return 1;
      }
    }
    d.times = (void *) insert->times;
    d.key = insert->key ? insert->key : insert->features;
    err = audiodb_insert_datum_internal(adb, &d);

    if(err == 2) {
      return 0;
    } else {
      return err;
    }
  } else {
    adb_datum_t datum;
    int err;

    if(audiodb_insert_create_datum(insert, &datum)) {
      return 1;
    }
    err = audiodb_insert_datum(adb, &datum);
    audiodb_free_datum(&datum);

    if(err == 2) {
      return 0;
    } else {
      return err;
    }
  }
}

int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
  int err;
  for(unsigned int n = 0; n < size; n++) {
    if((err = audiodb_insert(adb, &(insert[n])))) {
      return err;
    }
  }
  return 0;
}
author	mas01cr
date	Tue, 09 Dec 2008 22:48:30 +0000
parents	f0a69693eaef
children	d7e590d58c85