annotate index-utils.cpp @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents 9119f2fa3efe
children
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@509 4 #include "audioDB-internals.h"
mas01cr@589 5 #include "lshlib.h"
mas01cr@509 6
mas01cr@509 7 /*
mas01cr@509 8 * Routines which are common to both indexed query and index creation:
mas01cr@509 9 * we put them in their own file for build logistics.
mas01cr@509 10 */
mas01cr@509 11
mas01cr@509 12 /* FIXME: there are several things wrong with this: the memory
mas01cr@509 13 * discipline isn't ideal, the radius printing is a bit lame, the name
mas01cr@509 14 * getting will succeed or fail depending on whether the path was
mas01cr@509 15 * relative or absolute -- but most importantly encoding all that
mas01cr@509 16 * information in a filename is going to lose: it's impossible to
mas01cr@509 17 * maintain backwards-compatibility. Instead we should probably store
mas01cr@509 18 * the index metadata inside the audiodb instance. */
mas01cr@589 19 char *audiodb_index_get_name(const char *dbName, double radius, uint32_t sequenceLength) {
mas01cr@509 20 char *indexName;
mas01cr@509 21 if(strlen(dbName) > (ADB_MAXSTR - 32)) {
mas01cr@509 22 return NULL;
mas01cr@509 23 }
mas01cr@509 24 indexName = new char[ADB_MAXSTR];
mas01cr@509 25 strncpy(indexName, dbName, ADB_MAXSTR);
mas01cr@509 26 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
mas01cr@509 27 return indexName;
mas01cr@509 28 }
mas01cr@509 29
mas01cr@589 30 bool audiodb_index_exists(const char *dbName, double radius, uint32_t sequenceLength) {
mas01cr@509 31 char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
mas01cr@509 32 if(!indexName) {
mas01cr@509 33 return false;
mas01cr@509 34 }
mas01cr@509 35 struct stat st;
mas01cr@509 36 if(stat(indexName, &st)) {
mas01cr@509 37 delete [] indexName;
mas01cr@509 38 return false;
mas01cr@509 39 }
mas01cr@509 40 /* FIXME: other stat checks here? */
mas01cr@509 41 /* FIXME: is there any better way to check whether we can open a
mas01cr@509 42 * file for reading than by opening a file for reading? */
mas01cr@509 43 int fd = open(indexName, O_RDONLY);
mas01cr@509 44 delete [] indexName;
mas01cr@509 45 if(fd < 0) {
mas01cr@509 46 return false;
mas01cr@509 47 } else {
mas01cr@509 48 close(fd);
mas01cr@509 49 return true;
mas01cr@509 50 }
mas01cr@509 51 }
mas01cr@509 52
mas01cr@509 53 /* FIXME: the indexName arg should be "const char *", but the LSH
mas01cr@509 54 * library doesn't like that.
mas01cr@509 55 */
mas01cr@509 56 LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) {
mas01cr@509 57 LSH *lsh;
mas01cr@509 58 if(adb->cached_lsh) {
mas01cr@509 59 if(!strncmp(adb->cached_lsh->get_indexName(), indexName, ADB_MAXSTR)) {
mas01cr@509 60 return adb->cached_lsh;
mas01cr@509 61 } else {
mas01cr@509 62 delete adb->cached_lsh;
mas01cr@509 63 }
mas01cr@509 64 }
mas01cr@509 65 lsh = new LSH(indexName, load_tables);
mas01cr@509 66 if(load_tables) {
mas01cr@509 67 adb->cached_lsh = lsh;
mas01cr@509 68 }
mas01cr@509 69 return lsh;
mas01cr@509 70 }
mas01cr@509 71
mas01cr@589 72 vector<vector<float> > *audiodb_index_initialize_shingles(uint32_t sz, uint32_t dim, uint32_t seqLen) {
mas01cr@509 73 std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
mas01cr@589 74 for(uint32_t i=0 ; i < sz ; i++) {
mas01cr@509 75 (*vv)[i]=vector<float>(dim * seqLen);
mas01cr@509 76 }
mas01cr@509 77 return vv;
mas01cr@509 78 }
mas01cr@509 79
mas01cr@509 80 void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
mas01cr@509 81 delete vv;
mas01cr@509 82 }
mas01cr@509 83
mas01cr@589 84 void audiodb_index_make_shingle(vector<vector<float> >* vv, uint32_t idx, double* fvp, uint32_t dim, uint32_t seqLen){
mas01cr@589 85
mas01cr@509 86 vector<float>::iterator ve = (*vv)[idx].end();
mas01cr@509 87 vector<float>::iterator vi = (*vv)[idx].begin();
mas01cr@509 88 // First feature vector in shingle
mas01cr@509 89 if(idx == 0) {
mas01cr@509 90 while(vi!=ve) {
mas01cr@509 91 *vi++ = (float)(*fvp++);
mas01cr@509 92 }
mas01cr@509 93 } else {
mas01cr@509 94 // Not first feature vector in shingle
mas01cr@509 95 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim;
mas01cr@509 96 // Previous seqLen-1 dim-vectors
mas01cr@509 97 while(vi!=ve-dim) {
mas01cr@509 98 *vi++ = *ui++;
mas01cr@509 99 }
mas01cr@509 100 // Move data pointer to next feature vector
mas01cr@509 101 fvp += ( seqLen + idx - 1 ) * dim ;
mas01cr@509 102 // New d-vector
mas01cr@509 103 while(vi!=ve) {
mas01cr@509 104 *vi++ = (float)(*fvp++);
mas01cr@509 105 }
mas01cr@509 106 }
mas01cr@509 107 }
mas01cr@509 108
mas01cr@509 109 // in-place norming, no deletions. If using power, return number of
mas01cr@509 110 // shingles above power threshold.
mas01cr@589 111 int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, uint32_t dim, uint32_t seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
mas01cr@509 112 int z = 0; // number of above-threshold shingles
mas01cr@509 113 float l2norm;
mas01cr@509 114 double power;
mas01cr@509 115 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
mas01cr@509 116 float oneOverSqrtl2NormDivRad = oneOverRadius;
mas01cr@589 117 uint32_t shingleSize = seqLen * dim;
mas01cr@509 118
mas01cr@509 119 if(!spp) {
mas01cr@509 120 return -1;
mas01cr@509 121 }
mas01cr@589 122 for(uint32_t a=0; a<(*vv).size(); a++){
mas01cr@509 123 l2norm = (float)(*snp++);
mas01cr@509 124 if(normed_vectors)
mas01cr@509 125 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
mas01cr@509 126
mas01cr@589 127 for(uint32_t b=0; b < shingleSize ; b++)
mas01cr@509 128 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
mas01cr@509 129
mas01cr@509 130 power = *spp++;
mas01cr@509 131 if(use_pthreshold){
mas01cr@509 132 if (power >= pthreshold)
mas01cr@509 133 z++;
mas01cr@509 134 }
mas01cr@509 135 else
mas01cr@509 136 z++;
mas01cr@509 137 }
mas01cr@509 138 return z;
mas01cr@509 139 }