mas01cr@509: extern "C" { mas01cr@509: #include "audioDB_API.h" mas01cr@509: } mas01cr@509: #include "audioDB-internals.h" mas01cr@589: #include "lshlib.h" mas01cr@509: mas01cr@509: /* mas01cr@509: * Routines which are common to both indexed query and index creation: mas01cr@509: * we put them in their own file for build logistics. mas01cr@509: */ mas01cr@509: mas01cr@509: /* FIXME: there are several things wrong with this: the memory mas01cr@509: * discipline isn't ideal, the radius printing is a bit lame, the name mas01cr@509: * getting will succeed or fail depending on whether the path was mas01cr@509: * relative or absolute -- but most importantly encoding all that mas01cr@509: * information in a filename is going to lose: it's impossible to mas01cr@509: * maintain backwards-compatibility. Instead we should probably store mas01cr@509: * the index metadata inside the audiodb instance. */ mas01cr@589: char *audiodb_index_get_name(const char *dbName, double radius, uint32_t sequenceLength) { mas01cr@509: char *indexName; mas01cr@509: if(strlen(dbName) > (ADB_MAXSTR - 32)) { mas01cr@509: return NULL; mas01cr@509: } mas01cr@509: indexName = new char[ADB_MAXSTR]; mas01cr@509: strncpy(indexName, dbName, ADB_MAXSTR); mas01cr@509: sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength); mas01cr@509: return indexName; mas01cr@509: } mas01cr@509: mas01cr@589: bool audiodb_index_exists(const char *dbName, double radius, uint32_t sequenceLength) { mas01cr@509: char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength); mas01cr@509: if(!indexName) { mas01cr@509: return false; mas01cr@509: } mas01cr@509: struct stat st; mas01cr@509: if(stat(indexName, &st)) { mas01cr@509: delete [] indexName; mas01cr@509: return false; mas01cr@509: } mas01cr@509: /* FIXME: other stat checks here? */ mas01cr@509: /* FIXME: is there any better way to check whether we can open a mas01cr@509: * file for reading than by opening a file for reading? */ mas01cr@509: int fd = open(indexName, O_RDONLY); mas01cr@509: delete [] indexName; mas01cr@509: if(fd < 0) { mas01cr@509: return false; mas01cr@509: } else { mas01cr@509: close(fd); mas01cr@509: return true; mas01cr@509: } mas01cr@509: } mas01cr@509: mas01cr@509: /* FIXME: the indexName arg should be "const char *", but the LSH mas01cr@509: * library doesn't like that. mas01cr@509: */ mas01cr@509: LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) { mas01cr@509: LSH *lsh; mas01cr@509: if(adb->cached_lsh) { mas01cr@509: if(!strncmp(adb->cached_lsh->get_indexName(), indexName, ADB_MAXSTR)) { mas01cr@509: return adb->cached_lsh; mas01cr@509: } else { mas01cr@509: delete adb->cached_lsh; mas01cr@509: } mas01cr@509: } mas01cr@509: lsh = new LSH(indexName, load_tables); mas01cr@509: if(load_tables) { mas01cr@509: adb->cached_lsh = lsh; mas01cr@509: } mas01cr@509: return lsh; mas01cr@509: } mas01cr@509: mas01cr@589: vector > *audiodb_index_initialize_shingles(uint32_t sz, uint32_t dim, uint32_t seqLen) { mas01cr@509: std::vector > *vv = new vector >(sz); mas01cr@589: for(uint32_t i=0 ; i < sz ; i++) { mas01cr@509: (*vv)[i]=vector(dim * seqLen); mas01cr@509: } mas01cr@509: return vv; mas01cr@509: } mas01cr@509: mas01cr@509: void audiodb_index_delete_shingles(vector > *vv) { mas01cr@509: delete vv; mas01cr@509: } mas01cr@509: mas01cr@589: void audiodb_index_make_shingle(vector >* vv, uint32_t idx, double* fvp, uint32_t dim, uint32_t seqLen){ mas01cr@589: mas01cr@509: vector::iterator ve = (*vv)[idx].end(); mas01cr@509: vector::iterator vi = (*vv)[idx].begin(); mas01cr@509: // First feature vector in shingle mas01cr@509: if(idx == 0) { mas01cr@509: while(vi!=ve) { mas01cr@509: *vi++ = (float)(*fvp++); mas01cr@509: } mas01cr@509: } else { mas01cr@509: // Not first feature vector in shingle mas01cr@509: vector::iterator ui=(*vv)[idx-1].begin() + dim; mas01cr@509: // Previous seqLen-1 dim-vectors mas01cr@509: while(vi!=ve-dim) { mas01cr@509: *vi++ = *ui++; mas01cr@509: } mas01cr@509: // Move data pointer to next feature vector mas01cr@509: fvp += ( seqLen + idx - 1 ) * dim ; mas01cr@509: // New d-vector mas01cr@509: while(vi!=ve) { mas01cr@509: *vi++ = (float)(*fvp++); mas01cr@509: } mas01cr@509: } mas01cr@509: } mas01cr@509: mas01cr@509: // in-place norming, no deletions. If using power, return number of mas01cr@509: // shingles above power threshold. mas01cr@589: int audiodb_index_norm_shingles(vector >* vv, double* snp, double* spp, uint32_t dim, uint32_t seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) { mas01cr@509: int z = 0; // number of above-threshold shingles mas01cr@509: float l2norm; mas01cr@509: double power; mas01cr@509: float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2 mas01cr@509: float oneOverSqrtl2NormDivRad = oneOverRadius; mas01cr@589: uint32_t shingleSize = seqLen * dim; mas01cr@509: mas01cr@509: if(!spp) { mas01cr@509: return -1; mas01cr@509: } mas01cr@589: for(uint32_t a=0; a<(*vv).size(); a++){ mas01cr@509: l2norm = (float)(*snp++); mas01cr@509: if(normed_vectors) mas01cr@509: oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius; mas01cr@509: mas01cr@589: for(uint32_t b=0; b < shingleSize ; b++) mas01cr@509: (*vv)[a][b]*=oneOverSqrtl2NormDivRad; mas01cr@509: mas01cr@509: power = *spp++; mas01cr@509: if(use_pthreshold){ mas01cr@509: if (power >= pthreshold) mas01cr@509: z++; mas01cr@509: } mas01cr@509: else mas01cr@509: z++; mas01cr@509: } mas01cr@509: return z; mas01cr@509: }