annotate index-utils.cpp @ 509:cc2b97d020b1

Code rearrangements to tease apart library code from C++ audioDB code. There should be precisely no functional changes in this commit. Instead, the only thing that has happened is that all the abstraction violation and other horribleness is concentrated in one place: the include of "audioDB-internals.h" in audioDB.h -- the separation will be complete once that include can be removed. This include is necessary because the command-line binary / SOAP server still does some things directly rather than through an API: not least of which the operations that have not yet been integrated into the API yet, but also some messing around with constants, flags and nominally internal functions. The intent is to remove as many of these as possible and think quite hard about the rest. In the meantime, the library is now much more self-contained: the only things it uses are in the audioDB_API.h and audioDB-internals.h headers; thus there are fewer nasty surprises lurking for readers of the code. The Makefile has been adjusted to take advantage of this rearrangement in the dependencies.
author mas01cr
date Thu, 15 Jan 2009 13:57:33 +0000
parents
children 9119f2fa3efe
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@509 4 #include "audioDB-internals.h"
mas01cr@509 5
mas01cr@509 6 /*
mas01cr@509 7 * Routines which are common to both indexed query and index creation:
mas01cr@509 8 * we put them in their own file for build logistics.
mas01cr@509 9 */
mas01cr@509 10
mas01cr@509 11 /* FIXME: there are several things wrong with this: the memory
mas01cr@509 12 * discipline isn't ideal, the radius printing is a bit lame, the name
mas01cr@509 13 * getting will succeed or fail depending on whether the path was
mas01cr@509 14 * relative or absolute -- but most importantly encoding all that
mas01cr@509 15 * information in a filename is going to lose: it's impossible to
mas01cr@509 16 * maintain backwards-compatibility. Instead we should probably store
mas01cr@509 17 * the index metadata inside the audiodb instance. */
mas01cr@509 18 char *audiodb_index_get_name(const char *dbName, double radius, Uns32T sequenceLength) {
mas01cr@509 19 char *indexName;
mas01cr@509 20 if(strlen(dbName) > (ADB_MAXSTR - 32)) {
mas01cr@509 21 return NULL;
mas01cr@509 22 }
mas01cr@509 23 indexName = new char[ADB_MAXSTR];
mas01cr@509 24 strncpy(indexName, dbName, ADB_MAXSTR);
mas01cr@509 25 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
mas01cr@509 26 return indexName;
mas01cr@509 27 }
mas01cr@509 28
mas01cr@509 29 bool audiodb_index_exists(const char *dbName, double radius, Uns32T sequenceLength) {
mas01cr@509 30 char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
mas01cr@509 31 if(!indexName) {
mas01cr@509 32 return false;
mas01cr@509 33 }
mas01cr@509 34 struct stat st;
mas01cr@509 35 if(stat(indexName, &st)) {
mas01cr@509 36 delete [] indexName;
mas01cr@509 37 return false;
mas01cr@509 38 }
mas01cr@509 39 /* FIXME: other stat checks here? */
mas01cr@509 40 /* FIXME: is there any better way to check whether we can open a
mas01cr@509 41 * file for reading than by opening a file for reading? */
mas01cr@509 42 int fd = open(indexName, O_RDONLY);
mas01cr@509 43 delete [] indexName;
mas01cr@509 44 if(fd < 0) {
mas01cr@509 45 return false;
mas01cr@509 46 } else {
mas01cr@509 47 close(fd);
mas01cr@509 48 return true;
mas01cr@509 49 }
mas01cr@509 50 }
mas01cr@509 51
mas01cr@509 52 /* FIXME: the indexName arg should be "const char *", but the LSH
mas01cr@509 53 * library doesn't like that.
mas01cr@509 54 */
mas01cr@509 55 LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) {
mas01cr@509 56 LSH *lsh;
mas01cr@509 57 if(adb->cached_lsh) {
mas01cr@509 58 if(!strncmp(adb->cached_lsh->get_indexName(), indexName, ADB_MAXSTR)) {
mas01cr@509 59 return adb->cached_lsh;
mas01cr@509 60 } else {
mas01cr@509 61 delete adb->cached_lsh;
mas01cr@509 62 }
mas01cr@509 63 }
mas01cr@509 64 lsh = new LSH(indexName, load_tables);
mas01cr@509 65 if(load_tables) {
mas01cr@509 66 adb->cached_lsh = lsh;
mas01cr@509 67 }
mas01cr@509 68 return lsh;
mas01cr@509 69 }
mas01cr@509 70
mas01cr@509 71 vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) {
mas01cr@509 72 std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
mas01cr@509 73 for(Uns32T i=0 ; i < sz ; i++) {
mas01cr@509 74 (*vv)[i]=vector<float>(dim * seqLen);
mas01cr@509 75 }
mas01cr@509 76 return vv;
mas01cr@509 77 }
mas01cr@509 78
mas01cr@509 79 void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
mas01cr@509 80 delete vv;
mas01cr@509 81 }
mas01cr@509 82
mas01cr@509 83 void audiodb_index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
mas01cr@509 84 assert(idx<(*vv).size());
mas01cr@509 85 vector<float>::iterator ve = (*vv)[idx].end();
mas01cr@509 86 vector<float>::iterator vi = (*vv)[idx].begin();
mas01cr@509 87 // First feature vector in shingle
mas01cr@509 88 if(idx == 0) {
mas01cr@509 89 while(vi!=ve) {
mas01cr@509 90 *vi++ = (float)(*fvp++);
mas01cr@509 91 }
mas01cr@509 92 } else {
mas01cr@509 93 // Not first feature vector in shingle
mas01cr@509 94 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim;
mas01cr@509 95 // Previous seqLen-1 dim-vectors
mas01cr@509 96 while(vi!=ve-dim) {
mas01cr@509 97 *vi++ = *ui++;
mas01cr@509 98 }
mas01cr@509 99 // Move data pointer to next feature vector
mas01cr@509 100 fvp += ( seqLen + idx - 1 ) * dim ;
mas01cr@509 101 // New d-vector
mas01cr@509 102 while(vi!=ve) {
mas01cr@509 103 *vi++ = (float)(*fvp++);
mas01cr@509 104 }
mas01cr@509 105 }
mas01cr@509 106 }
mas01cr@509 107
mas01cr@509 108 // in-place norming, no deletions. If using power, return number of
mas01cr@509 109 // shingles above power threshold.
mas01cr@509 110 int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
mas01cr@509 111 int z = 0; // number of above-threshold shingles
mas01cr@509 112 float l2norm;
mas01cr@509 113 double power;
mas01cr@509 114 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
mas01cr@509 115 float oneOverSqrtl2NormDivRad = oneOverRadius;
mas01cr@509 116 Uns32T shingleSize = seqLen * dim;
mas01cr@509 117
mas01cr@509 118 if(!spp) {
mas01cr@509 119 return -1;
mas01cr@509 120 }
mas01cr@509 121 for(Uns32T a=0; a<(*vv).size(); a++){
mas01cr@509 122 l2norm = (float)(*snp++);
mas01cr@509 123 if(normed_vectors)
mas01cr@509 124 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
mas01cr@509 125
mas01cr@509 126 for(Uns32T b=0; b < shingleSize ; b++)
mas01cr@509 127 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
mas01cr@509 128
mas01cr@509 129 power = *spp++;
mas01cr@509 130 if(use_pthreshold){
mas01cr@509 131 if (power >= pthreshold)
mas01cr@509 132 z++;
mas01cr@509 133 }
mas01cr@509 134 else
mas01cr@509 135 z++;
mas01cr@509 136 }
mas01cr@509 137 return z;
mas01cr@509 138 }