view l2norm.cpp @ 405:ef4792df8f93 api-inversion

invert audioDB::insert / audiodb_insert(). Start off by removing audioDB::insertDatum, and essentially reusing it as audiodb_insert. We now ignore the fact that the command-line parsing code has "helpfully" opened a std::ifstream for the times file and an fd for the power file, and simply go ahead and do our own dirty work. We can delete audioDB::insertDatum entirely, but unfortunately we can't delete audioDB::insertPowerData and audioDB::insertTimestamps, because the index and query code respectively use them. Instead, move the two methods closer to their single uses. audiodb_insert() is perhaps not as short and simple as it might have been hoped given the existence of audiodb_insert_datum(); some of that is C and its terribly way of making you pay every time you use dynamic memory; some of it is the fact that the three different files (feature, times, power) each requires slightly different treatment. Hey ho. We can implement audiodb_batchinsert() in terms of audiodb_insert(); the function is pleasingly small. We can't quite use it for audioDB::batchinsert yet, as we have to deal with the O2_FLAG_LARGE_ADB case (which codepath is untested in libtests/). This means that we can delete whole swathes of hideous code from audioDB.cpp, including not just the versions of audiodb_insert() and audiodb_batchinsert() but also an entire audioDB constructor. Yay. (audioDB::unitNormAndInsertL2 has also died a deserved death).
author mas01cr
date Fri, 05 Dec 2008 22:32:49 +0000
parents 7038f31124d1
children d7e590d58c85
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
#include "audioDB-internals.h"
}

static int audiodb_l2norm_existing(adb_t *adb) {
  double *data_buffer, *l2norm_buffer;
  double *dp, *lp;
  adb_header_t *header = adb->header;
  size_t data_buffer_size = ALIGN_PAGE_UP(header->length);
  size_t nvectors = header->length / (sizeof(double) * header->dim);
  /* FIXME: this map of the vector data will lose if we ever turn the
   * l2norm flag on when we have already inserted a large number of
   * vectors, as the mmap() will fail.  "Don't do that, then" is one
   * possible answer. */
  mmap_or_goto_error(double *, data_buffer, header->dataOffset, data_buffer_size);
  l2norm_buffer = (double *) malloc(nvectors * sizeof(double));
  if(!l2norm_buffer) {
    goto error;
  }

  dp = data_buffer;
  lp = l2norm_buffer;
  for(size_t i = 0; i < nvectors; i++) {
    *lp = 0;
    for(unsigned int k = 0; k < header->dim; k++) {
      *lp += (*dp)*(*dp);
      dp++;
    }
    lp++;
  }

  if(lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET) == (off_t) -1) {
    goto error;
  }
  if(write(adb->fd, l2norm_buffer, nvectors * sizeof(double)) != (ssize_t) (nvectors * sizeof(double))) {
    goto error;
  }

  munmap(data_buffer, data_buffer_size);
  free(l2norm_buffer);

  return 0;

 error:
  maybe_munmap(data_buffer, data_buffer_size);
  if(l2norm_buffer) {
    free(l2norm_buffer);
  }
  return 1;
}

int audiodb_l2norm(adb_t *adb) {
  adb_header_t *header = adb->header;
  if(!(adb->flags & O_RDWR)) {
    return 1;
  }
  if(header->flags & O2_FLAG_L2NORM) {
    /* non-error code for forthcoming backwards-compatibility
     * reasons */
    return 0;
  }
  if((!(header->flags & O2_FLAG_LARGE_ADB)) && (header->length > 0)) {
    if(audiodb_l2norm_existing(adb)) {
      goto error;
    }
  }
  adb->header->flags |= O2_FLAG_L2NORM;
  return audiodb_sync_header(adb);

 error:
  return 1;
}