annotate index-utils.cpp @ 601:82d23418d867

Fix some fd leaks in the command-line binary Strictly speaking, they're not really leaks, because the only codepath that suffers from these leaks exits immediately afterwards. On the other hand, this fix makes valgrind on e.g. tests/0025 happier, going from 5 errors to none.
author mas01cr
date Fri, 14 Aug 2009 16:39:32 +0000
parents 9119f2fa3efe
children
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@509 4 #include "audioDB-internals.h"
mas01cr@589 5 #include "lshlib.h"
mas01cr@509 6
mas01cr@509 7 /*
mas01cr@509 8 * Routines which are common to both indexed query and index creation:
mas01cr@509 9 * we put them in their own file for build logistics.
mas01cr@509 10 */
mas01cr@509 11
mas01cr@509 12 /* FIXME: there are several things wrong with this: the memory
mas01cr@509 13 * discipline isn't ideal, the radius printing is a bit lame, the name
mas01cr@509 14 * getting will succeed or fail depending on whether the path was
mas01cr@509 15 * relative or absolute -- but most importantly encoding all that
mas01cr@509 16 * information in a filename is going to lose: it's impossible to
mas01cr@509 17 * maintain backwards-compatibility. Instead we should probably store
mas01cr@509 18 * the index metadata inside the audiodb instance. */
mas01cr@589 19 char *audiodb_index_get_name(const char *dbName, double radius, uint32_t sequenceLength) {
mas01cr@509 20 char *indexName;
mas01cr@509 21 if(strlen(dbName) > (ADB_MAXSTR - 32)) {
mas01cr@509 22 return NULL;
mas01cr@509 23 }
mas01cr@509 24 indexName = new char[ADB_MAXSTR];
mas01cr@509 25 strncpy(indexName, dbName, ADB_MAXSTR);
mas01cr@509 26 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
mas01cr@509 27 return indexName;
mas01cr@509 28 }
mas01cr@509 29
mas01cr@589 30 bool audiodb_index_exists(const char *dbName, double radius, uint32_t sequenceLength) {
mas01cr@509 31 char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
mas01cr@509 32 if(!indexName) {
mas01cr@509 33 return false;
mas01cr@509 34 }
mas01cr@509 35 struct stat st;
mas01cr@509 36 if(stat(indexName, &st)) {
mas01cr@509 37 delete [] indexName;
mas01cr@509 38 return false;
mas01cr@509 39 }
mas01cr@509 40 /* FIXME: other stat checks here? */
mas01cr@509 41 /* FIXME: is there any better way to check whether we can open a
mas01cr@509 42 * file for reading than by opening a file for reading? */
mas01cr@509 43 int fd = open(indexName, O_RDONLY);
mas01cr@509 44 delete [] indexName;
mas01cr@509 45 if(fd < 0) {
mas01cr@509 46 return false;
mas01cr@509 47 } else {
mas01cr@509 48 close(fd);
mas01cr@509 49 return true;
mas01cr@509 50 }
mas01cr@509 51 }
mas01cr@509 52
mas01cr@509 53 /* FIXME: the indexName arg should be "const char *", but the LSH
mas01cr@509 54 * library doesn't like that.
mas01cr@509 55 */
mas01cr@509 56 LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) {
mas01cr@509 57 LSH *lsh;
mas01cr@509 58 if(adb->cached_lsh) {
mas01cr@509 59 if(!strncmp(adb->cached_lsh->get_indexName(), indexName, ADB_MAXSTR)) {
mas01cr@509 60 return adb->cached_lsh;
mas01cr@509 61 } else {
mas01cr@509 62 delete adb->cached_lsh;
mas01cr@509 63 }
mas01cr@509 64 }
mas01cr@509 65 lsh = new LSH(indexName, load_tables);
mas01cr@509 66 if(load_tables) {
mas01cr@509 67 adb->cached_lsh = lsh;
mas01cr@509 68 }
mas01cr@509 69 return lsh;
mas01cr@509 70 }
mas01cr@509 71
mas01cr@589 72 vector<vector<float> > *audiodb_index_initialize_shingles(uint32_t sz, uint32_t dim, uint32_t seqLen) {
mas01cr@509 73 std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
mas01cr@589 74 for(uint32_t i=0 ; i < sz ; i++) {
mas01cr@509 75 (*vv)[i]=vector<float>(dim * seqLen);
mas01cr@509 76 }
mas01cr@509 77 return vv;
mas01cr@509 78 }
mas01cr@509 79
mas01cr@509 80 void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
mas01cr@509 81 delete vv;
mas01cr@509 82 }
mas01cr@509 83
mas01cr@589 84 void audiodb_index_make_shingle(vector<vector<float> >* vv, uint32_t idx, double* fvp, uint32_t dim, uint32_t seqLen){
mas01cr@589 85
mas01cr@509 86 vector<float>::iterator ve = (*vv)[idx].end();
mas01cr@509 87 vector<float>::iterator vi = (*vv)[idx].begin();
mas01cr@509 88 // First feature vector in shingle
mas01cr@509 89 if(idx == 0) {
mas01cr@509 90 while(vi!=ve) {
mas01cr@509 91 *vi++ = (float)(*fvp++);
mas01cr@509 92 }
mas01cr@509 93 } else {
mas01cr@509 94 // Not first feature vector in shingle
mas01cr@509 95 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim;
mas01cr@509 96 // Previous seqLen-1 dim-vectors
mas01cr@509 97 while(vi!=ve-dim) {
mas01cr@509 98 *vi++ = *ui++;
mas01cr@509 99 }
mas01cr@509 100 // Move data pointer to next feature vector
mas01cr@509 101 fvp += ( seqLen + idx - 1 ) * dim ;
mas01cr@509 102 // New d-vector
mas01cr@509 103 while(vi!=ve) {
mas01cr@509 104 *vi++ = (float)(*fvp++);
mas01cr@509 105 }
mas01cr@509 106 }
mas01cr@509 107 }
mas01cr@509 108
mas01cr@509 109 // in-place norming, no deletions. If using power, return number of
mas01cr@509 110 // shingles above power threshold.
mas01cr@589 111 int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, uint32_t dim, uint32_t seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
mas01cr@509 112 int z = 0; // number of above-threshold shingles
mas01cr@509 113 float l2norm;
mas01cr@509 114 double power;
mas01cr@509 115 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
mas01cr@509 116 float oneOverSqrtl2NormDivRad = oneOverRadius;
mas01cr@589 117 uint32_t shingleSize = seqLen * dim;
mas01cr@509 118
mas01cr@509 119 if(!spp) {
mas01cr@509 120 return -1;
mas01cr@509 121 }
mas01cr@589 122 for(uint32_t a=0; a<(*vv).size(); a++){
mas01cr@509 123 l2norm = (float)(*snp++);
mas01cr@509 124 if(normed_vectors)
mas01cr@509 125 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
mas01cr@509 126
mas01cr@589 127 for(uint32_t b=0; b < shingleSize ; b++)
mas01cr@509 128 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
mas01cr@509 129
mas01cr@509 130 power = *spp++;
mas01cr@509 131 if(use_pthreshold){
mas01cr@509 132 if (power >= pthreshold)
mas01cr@509 133 z++;
mas01cr@509 134 }
mas01cr@509 135 else
mas01cr@509 136 z++;
mas01cr@509 137 }
mas01cr@509 138 return z;
mas01cr@509 139 }