view index-utils.cpp @ 584:e3790284fd4a

Merged through librdf storage hooks and apache2 module. Squashed commit of the following: commit a6cfca8f04036e12e7d7fcd55c47224e802582f0 Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:23:32 2009 +0100 Removed leftover bits and bobs. commit f1f0dd074d0767de3e24ba636779fd8701d73d9e Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:07:20 2009 +0100 Simple test of database creation via librdf. commit 90e6350538e004d8785137e5ff2ac878c22a5d42 Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:05:10 2009 +0100 Added the apache2 module which will hook into the librdf storage module commit c75bf53763b7078c83ae97fcf247da2576baa79a Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:04:53 2009 +0100 Added sparql librdf source - requires the librdf sources to compile. commit 0646f0190112a73ddb2533537e2cc9832c066b52 Author: Michael Jewell <mjewell@harrison.(none)> Date: Mon Jul 27 12:12:26 2009 +0100 Adding execution to mod_audiodb commit 8f83f27ba4d917278bca0c7cb665d930e28c86df Author: Michael Jewell <mjewell@harrison.(none)> Date: Wed Jul 22 12:15:57 2009 +0100 Some initial returns for the sparql handler. commit dc639aed11943a5b0c379eb47cf293f76908b1b7 Author: Michael Jewell <mjewell@harrison.(none)> Date: Wed Jul 22 12:06:20 2009 +0100 Added a little setup.sh script to do libtoolize/autoconf etc. commit 3a679da499db647fc82cf2797daeb5cc44ed7655 Author: Michael Jewell <mjewell@harrison.(none)> Date: Wed Jul 22 12:03:42 2009 +0100 Adding initial bits for apache mod
author mas01mj
date Fri, 31 Jul 2009 14:36:12 +0000
parents cc2b97d020b1
children 9119f2fa3efe
line wrap: on
line source
extern "C" {
#include "audioDB_API.h"
}
#include "audioDB-internals.h"

/*
 * Routines which are common to both indexed query and index creation:
 * we put them in their own file for build logistics.
 */

/* FIXME: there are several things wrong with this: the memory
 * discipline isn't ideal, the radius printing is a bit lame, the name
 * getting will succeed or fail depending on whether the path was
 * relative or absolute -- but most importantly encoding all that
 * information in a filename is going to lose: it's impossible to
 * maintain backwards-compatibility.  Instead we should probably store
 * the index metadata inside the audiodb instance. */
char *audiodb_index_get_name(const char *dbName, double radius, Uns32T sequenceLength) {
  char *indexName;
  if(strlen(dbName) > (ADB_MAXSTR - 32)) {
    return NULL;
  }
  indexName = new char[ADB_MAXSTR];  
  strncpy(indexName, dbName, ADB_MAXSTR);
  sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
  return indexName;
}

bool audiodb_index_exists(const char *dbName, double radius, Uns32T sequenceLength) {
  char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
  if(!indexName) {
    return false;
  }
  struct stat st;
  if(stat(indexName, &st)) {
    delete [] indexName;
    return false;
  }
  /* FIXME: other stat checks here? */
  /* FIXME: is there any better way to check whether we can open a
   * file for reading than by opening a file for reading? */
  int fd = open(indexName, O_RDONLY);
  delete [] indexName;
  if(fd < 0) {
    return false;
  } else {
    close(fd);
    return true;
  }
}

/* FIXME: the indexName arg should be "const char *", but the LSH
 * library doesn't like that.
 */
LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) {
  LSH *lsh;
  if(adb->cached_lsh) {
    if(!strncmp(adb->cached_lsh->get_indexName(), indexName, ADB_MAXSTR)) {
      return adb->cached_lsh;
    } else {
      delete adb->cached_lsh;
    }
  }
  lsh = new LSH(indexName, load_tables);
  if(load_tables) {
    adb->cached_lsh = lsh;
  } 
  return lsh;
}

vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) {
  std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
  for(Uns32T i=0 ; i < sz ; i++) {
    (*vv)[i]=vector<float>(dim * seqLen);
  }
  return vv;
}

void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
  delete vv;
}

void audiodb_index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
  assert(idx<(*vv).size());
  vector<float>::iterator ve = (*vv)[idx].end();
  vector<float>::iterator vi = (*vv)[idx].begin();
  // First feature vector in shingle
  if(idx == 0) {
    while(vi!=ve) {
      *vi++ = (float)(*fvp++);
    }
  } else {
    // Not first feature vector in shingle
    vector<float>::iterator ui=(*vv)[idx-1].begin() + dim;
    // Previous seqLen-1 dim-vectors
    while(vi!=ve-dim) {
      *vi++ = *ui++;
    }
    // Move data pointer to next feature vector
    fvp += ( seqLen + idx - 1 ) * dim ;
    // New d-vector
    while(vi!=ve) {
      *vi++ = (float)(*fvp++);
    }
  }
}

// in-place norming, no deletions.  If using power, return number of
// shingles above power threshold.
int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
  int z = 0; // number of above-threshold shingles
  float l2norm;
  double power;
  float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
  float oneOverSqrtl2NormDivRad = oneOverRadius;
  Uns32T shingleSize = seqLen * dim;

  if(!spp) {
    return -1;
  }
  for(Uns32T a=0; a<(*vv).size(); a++){
    l2norm = (float)(*snp++);
    if(normed_vectors)
      oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
    
    for(Uns32T b=0; b < shingleSize ; b++)
      (*vv)[a][b]*=oneOverSqrtl2NormDivRad;

    power = *spp++;
    if(use_pthreshold){
      if (power >= pthreshold)
	z++;
    }
    else
      z++;	
  }
  return z;
}