view insert.cpp @ 410:d7e590d58c85 api-inversion

Pavlovian response to compiler warnings... ... attempt to squash them. For now we can get most of the way by writing a simple write_or_goto_error() macro for write(), and the equivalent for read(). One of the warnings, for the return value of chdir(), is silly, because we're already in an error case, and we really can't do anything sensible if the chdir fails. Try to deal with it anyway.
author mas01cr
date Thu, 11 Dec 2008 08:54:01 +0000
parents 99e6cbad7f76
children 223eda8408e1
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
}
#include "audioDB-internals.h"

static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
  adb_header_t *header = adb->header;
  if(header->flags & O2_FLAG_LARGE_ADB) {
    return true;
  } else {
    /* FIXME: timesTableOffset isn't necessarily the next biggest
     * offset after dataOffset.  Maybe make the offsets into an array
     * that we can iterate over... */
    return (header->timesTableOffset > 
            (header->dataOffset + header->length + size));
  }
}

static bool audiodb_enough_per_file_space_free(adb_t *adb) {
  /* FIXME: the comment above about the ordering of the tables applies
     here too. */
  adb_header_t *header = adb->header;
  off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
  off_t track_table_length = header->dataOffset - header->trackTableOffset;
  int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE;
  int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE;
  /* maxfiles is the _minimum_ of the two.  Do not be confused... */
  int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
  if(header->flags & O2_FLAG_LARGE_ADB) {
    /* by default, these tables are created with the same size as the
     * fileTable (which should be called key_table); relying on that
     * always being the case, though, smacks of optimism, so instead
     * we code defensively... */
    off_t data_table_length = header->timesTableOffset - header->dataOffset;
    off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
    off_t power_table_length = header->dbSize - header->powerTableOffset;
    int dmaxfiles = data_table_length / O2_FILETABLE_ENTRY_SIZE;
    int timaxfiles = times_table_length / O2_FILETABLE_ENTRY_SIZE;
    int pmaxfiles = power_table_length / O2_FILETABLE_ENTRY_SIZE;
    /* ... even though it means a certain amount of tedium. */
    maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
    maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
    maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
  }
  return (header->numFiles < (unsigned int) maxfiles);
}

/*
 * Hey, look, a comment.  Normally I wouldn't bother, as the code
 * should be self-documenting, but a lot of logic is concentrated in
 * this one place, so let's give an overview beforehand.  To insert a
 * datum into the database, we:
 *
 *  1. check write permission;
 *  2. check for enough space;
 *  3. check that datum->dim and adb->header->dim agree (or that the
 *     header dimension is zero, in which case write datum->dim to
 *     adb->header->dim).
 *  4. check for presence of datum->key in adb->keys;
 *  5. check for consistency between power and O2_FLAG_POWER, and 
 *     times and O2_FLAG_TIMES;
 *  6. write in data, power, times as appropriate; add to track
 *     and key tables too;
 *  7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
 *     in table;
 *  8. update adb->keys and adb->header;
 *  9. sync adb->header with disk.
 *
 * Step 9 essentially commits the transaction; until we update
 * header->length, nothing will recognize the newly-written data.  In
 * principle, if it fails, we should roll back, which we can in fact
 * do on the assumption that nothing in step 8 can ever fail; on the
 * other hand, if it's failed, then it's unlikely that rolling back by
 * syncing the original header back to disk is going to work
 * desperately well.  We should perhaps take an operating-system lock
 * around step 9, so that we can't be interrupted part-way through
 * (except of course for SIGKILL, but if we're hit with that we will
 * always lose).
 */
static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {

  off_t size, offset, nfiles;
  double *l2norm_buffer = NULL, *lp, *dp;  

  /* 1. check write permission; */
  if(!(adb->flags & O_RDWR)) {
    return 1;
  }
  /* 2. check for enough space; */
  size = sizeof(double) * datum->nvectors * datum->dim;
  if(!audiodb_enough_data_space_free(adb, size)) {
    return 1;
  }
  if(!audiodb_enough_per_file_space_free(adb)) {
    return 1;
  }
  /* 3. check that datum->dim and adb->header->dim agree (or that the
   *    header dimension is zero, in which case write datum->dim to
   *    adb->header->dim).
   */
  if(adb->header->dim == 0) {
    adb->header->dim = datum->dim;
  } else if (adb->header->dim != datum->dim) {
    return 1;
  }
  /* 4. check for presence of datum->key in adb->keys; */
  if(adb->keys->count(datum->key)) {
    /* not part of an explicit API/ABI, but we need a distinguished
       value in this circumstance to preserve somewhat wonky behaviour
       of audioDB::batchinsert. */
    return 2;
  }
  /* 5. check for consistency between power and O2_FLAG_POWER, and
   *    times and O2_FLAG_TIMES; 
   */
  if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) ||
     ((adb->header->flags & O2_FLAG_POWER) && !datum->power)) {
    return 1;
  }
  if(datum->times && !(adb->header->flags & O2_FLAG_TIMES)) {
    if(adb->header->numFiles == 0) {
      adb->header->flags |= O2_FLAG_TIMES;
    } else {
      return 1;
    }
  } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) {
    return 1;
  }
  /* 6. write in data, power, times as appropriate; add to track
   *    and key tables too;
   */
  offset = adb->header->length;
  nfiles = adb->header->numFiles;

  /* FIXME: checking for all these lseek()s */
  lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
  write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1);
  lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
  write_or_goto_error(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE);
  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
    char cwd[PATH_MAX];
    char slash = '/';

    if(!getcwd(cwd, PATH_MAX)) {
      goto error;
    }
    lseek(adb->fd, adb->header->dataOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
    if(*((char *) datum->data) != '/') {
      write_or_goto_error(adb->fd, cwd, strlen(cwd));
      write_or_goto_error(adb->fd, &slash, 1);
    }
    write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1);
    if(datum->power) {
      lseek(adb->fd, adb->header->powerTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
      if(*((char *) datum->power) != '/') {
        write_or_goto_error(adb->fd, cwd, strlen(cwd));
        write_or_goto_error(adb->fd, &slash, 1);
      }
      write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1);
    }
    if(datum->times) {
      lseek(adb->fd, adb->header->timesTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
      if(*((char *) datum->times) != '/') {
        write_or_goto_error(adb->fd, cwd, strlen(cwd));
        write_or_goto_error(adb->fd, &slash, 1);
      }
      write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1);
    }
  } else {
    lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
    write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
    if(datum->power) {
      lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
      write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors);
    }
    if(datum->times) {
      lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
      write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
    }
  }

  /* 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
   *    in table;
   */
  if((adb->header->flags & O2_FLAG_L2NORM) &&
     !(adb->header->flags & O2_FLAG_LARGE_ADB)) {
    l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
    
    /* FIXME: shared code with audiodb_norm_existing() */
    dp = (double *) datum->data;
    lp = l2norm_buffer;
    for(size_t i = 0; i < datum->nvectors; i++) {
      *lp = 0;
      for(unsigned int k = 0; k < datum->dim; k++) {
        *lp += (*dp)*(*dp);
        dp++;
      }
      lp++;
    }
    lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
    write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
    free(l2norm_buffer);
    l2norm_buffer = NULL;
  }

  /* 8. update adb->keys and adb->header; */
  adb->keys->insert(datum->key);
  adb->header->numFiles += 1;
  adb->header->length += sizeof(double) * datum->nvectors * datum->dim;

  /* 9. sync adb->header with disk. */
  return audiodb_sync_header(adb);

 error:
  if(l2norm_buffer) {
    free(l2norm_buffer);
  }
  return 1;
}

int audiodb_insert_datum(adb_t *adb, adb_datum_t *datum) {
  adb_datum_internal_t d;
  d.nvectors = datum->nvectors;
  d.dim = datum->dim;
  d.key = datum->key;
  d.data = datum->data;
  d.times = datum->times;
  d.power = datum->power;
  return audiodb_insert_datum_internal(adb, &d);
}

static int audiodb_free_datum(adb_datum_t *datum) {
  if(datum->data) {
    free(datum->data);
  }
  if(datum->power) {
    free(datum->power);
  }
  if(datum->times) {
    free(datum->times);
  }
  return 0;
}

static int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum) {
  int fd = 0;
  FILE *file = NULL;
  struct stat st;
  off_t size;

  datum->data = NULL;
  datum->power = NULL;
  datum->times = NULL;
  if((fd = open(insert->features, O_RDONLY)) == -1) {
    goto error;
  }
  if(fstat(fd, &st)) {
    goto error;
  }
  read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t));
  size = st.st_size - sizeof(uint32_t);
  datum->nvectors = size / (sizeof(double) * datum->dim);
  datum->data = (double *) malloc(size);
  if(!datum->data) {
    goto error;
  }
  read_or_goto_error(fd, datum->data, size);
  close(fd);
  fd = 0;
  if(insert->power) {
    int dim;
    if((fd = open(insert->power, O_RDONLY)) == -1) {
      goto error;
    }
    if(fstat(fd, &st)) {
      goto error;
    }
    if((st.st_size - sizeof(uint32_t)) != (size / datum->dim)) {
      goto error;
    }
    read_or_goto_error(fd, &dim, sizeof(uint32_t));
    if(dim != 1) {
      goto error;
    }
    datum->power = (double *) malloc(size / datum->dim);
    if(!datum->power) {
      goto error;
    }
    read_or_goto_error(fd, datum->power, size / datum->dim);
    close(fd);
  }
  if(insert->times) {
    double t, *tp;
    if(!(file = fopen(insert->times, "r"))) {
      goto error;
    }
    datum->times = (double *) malloc(2 * size / datum->dim);
    if(!datum->times) {
      goto error;
    }
    if(fscanf(file, " %lf", &t) != 1) {
      goto error;
    }
    tp = datum->times;
    *tp++ = t;
    for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
      if(fscanf(file, " %lf", &t) != 1) {
        goto error;
      }
      *tp++ = t;
      *tp++ = t;
    }
    if(fscanf(file, " %lf", &t) != 1) {
      goto error;
    }
    *tp = t;
    fclose(file);
  }
  datum->key = insert->key ? insert->key : insert->features;
  return 0;

 error:
  if(fd > 0) {
    close(fd);
  }
  if(file) {
    fclose(file);
  }
  audiodb_free_datum(datum);
  return 1;
}

int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
    adb_datum_internal_t d;
    struct stat st;
    int fd;
    int err;
    off_t size;
    
    if((fd = open(insert->features, O_RDONLY)) == -1) {
      return 1;
    }
    if(fstat(fd, &st)) {
      goto error;
    }
    read_or_goto_error(fd, &(d.dim), sizeof(uint32_t));
    close(fd);
    fd = 0;
    size = st.st_size - sizeof(uint32_t);
    d.nvectors = size / (sizeof(double) * d.dim);
    d.data = (void *) insert->features;
    if(insert->power) {
      if(stat(insert->power, &st)) {
        return 1;
      }
    }
    d.power = (void *) insert->power;
    if(insert->times) {
      if(stat(insert->times, &st)) {
        return 1;
      }
    }
    d.times = (void *) insert->times;
    d.key = insert->key ? insert->key : insert->features;
    err = audiodb_insert_datum_internal(adb, &d);

    if(err == 2) {
      return 0;
    } else {
      return err;
    }
  error:
    if(fd) {
      close(fd);
    }
    return 1;
  } else {
    adb_datum_t datum;
    int err;

    if(audiodb_insert_create_datum(insert, &datum)) {
      return 1;
    }
    err = audiodb_insert_datum(adb, &datum);
    audiodb_free_datum(&datum);

    if(err == 2) {
      return 0;
    } else {
      return err;
    }
  }
}

int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
  int err;
  for(unsigned int n = 0; n < size; n++) {
    if((err = audiodb_insert(adb, &(insert[n])))) {
      return err;
    }
  }
  return 0;
}