mas01cr@509
|
1 extern "C" {
|
mas01cr@509
|
2 #include "audioDB_API.h"
|
mas01cr@509
|
3 }
|
mas01cr@509
|
4 #include "audioDB-internals.h"
|
mas01cr@589
|
5 #include "lshlib.h"
|
mas01cr@509
|
6
|
mas01cr@509
|
7 /*
|
mas01cr@509
|
8 * Routines which are common to both indexed query and index creation:
|
mas01cr@509
|
9 * we put them in their own file for build logistics.
|
mas01cr@509
|
10 */
|
mas01cr@509
|
11
|
mas01cr@509
|
12 /* FIXME: there are several things wrong with this: the memory
|
mas01cr@509
|
13 * discipline isn't ideal, the radius printing is a bit lame, the name
|
mas01cr@509
|
14 * getting will succeed or fail depending on whether the path was
|
mas01cr@509
|
15 * relative or absolute -- but most importantly encoding all that
|
mas01cr@509
|
16 * information in a filename is going to lose: it's impossible to
|
mas01cr@509
|
17 * maintain backwards-compatibility. Instead we should probably store
|
mas01cr@509
|
18 * the index metadata inside the audiodb instance. */
|
mas01cr@589
|
19 char *audiodb_index_get_name(const char *dbName, double radius, uint32_t sequenceLength) {
|
mas01cr@509
|
20 char *indexName;
|
mas01cr@509
|
21 if(strlen(dbName) > (ADB_MAXSTR - 32)) {
|
mas01cr@509
|
22 return NULL;
|
mas01cr@509
|
23 }
|
mas01cr@509
|
24 indexName = new char[ADB_MAXSTR];
|
mas01cr@509
|
25 strncpy(indexName, dbName, ADB_MAXSTR);
|
mas01cr@509
|
26 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
|
mas01cr@509
|
27 return indexName;
|
mas01cr@509
|
28 }
|
mas01cr@509
|
29
|
mas01cr@589
|
30 bool audiodb_index_exists(const char *dbName, double radius, uint32_t sequenceLength) {
|
mas01cr@509
|
31 char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
|
mas01cr@509
|
32 if(!indexName) {
|
mas01cr@509
|
33 return false;
|
mas01cr@509
|
34 }
|
mas01cr@509
|
35 struct stat st;
|
mas01cr@509
|
36 if(stat(indexName, &st)) {
|
mas01cr@509
|
37 delete [] indexName;
|
mas01cr@509
|
38 return false;
|
mas01cr@509
|
39 }
|
mas01cr@509
|
40 /* FIXME: other stat checks here? */
|
mas01cr@509
|
41 /* FIXME: is there any better way to check whether we can open a
|
mas01cr@509
|
42 * file for reading than by opening a file for reading? */
|
mas01cr@509
|
43 int fd = open(indexName, O_RDONLY);
|
mas01cr@509
|
44 delete [] indexName;
|
mas01cr@509
|
45 if(fd < 0) {
|
mas01cr@509
|
46 return false;
|
mas01cr@509
|
47 } else {
|
mas01cr@509
|
48 close(fd);
|
mas01cr@509
|
49 return true;
|
mas01cr@509
|
50 }
|
mas01cr@509
|
51 }
|
mas01cr@509
|
52
|
mas01cr@509
|
53 /* FIXME: the indexName arg should be "const char *", but the LSH
|
mas01cr@509
|
54 * library doesn't like that.
|
mas01cr@509
|
55 */
|
mas01cr@509
|
56 LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) {
|
mas01cr@509
|
57 LSH *lsh;
|
mas01cr@509
|
58 if(adb->cached_lsh) {
|
mas01cr@509
|
59 if(!strncmp(adb->cached_lsh->get_indexName(), indexName, ADB_MAXSTR)) {
|
mas01cr@509
|
60 return adb->cached_lsh;
|
mas01cr@509
|
61 } else {
|
mas01cr@509
|
62 delete adb->cached_lsh;
|
mas01cr@509
|
63 }
|
mas01cr@509
|
64 }
|
mas01cr@509
|
65 lsh = new LSH(indexName, load_tables);
|
mas01cr@509
|
66 if(load_tables) {
|
mas01cr@509
|
67 adb->cached_lsh = lsh;
|
mas01cr@509
|
68 }
|
mas01cr@509
|
69 return lsh;
|
mas01cr@509
|
70 }
|
mas01cr@509
|
71
|
mas01cr@589
|
72 vector<vector<float> > *audiodb_index_initialize_shingles(uint32_t sz, uint32_t dim, uint32_t seqLen) {
|
mas01cr@509
|
73 std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
|
mas01cr@589
|
74 for(uint32_t i=0 ; i < sz ; i++) {
|
mas01cr@509
|
75 (*vv)[i]=vector<float>(dim * seqLen);
|
mas01cr@509
|
76 }
|
mas01cr@509
|
77 return vv;
|
mas01cr@509
|
78 }
|
mas01cr@509
|
79
|
mas01cr@509
|
80 void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
|
mas01cr@509
|
81 delete vv;
|
mas01cr@509
|
82 }
|
mas01cr@509
|
83
|
mas01cr@589
|
84 void audiodb_index_make_shingle(vector<vector<float> >* vv, uint32_t idx, double* fvp, uint32_t dim, uint32_t seqLen){
|
mas01cr@589
|
85
|
mas01cr@509
|
86 vector<float>::iterator ve = (*vv)[idx].end();
|
mas01cr@509
|
87 vector<float>::iterator vi = (*vv)[idx].begin();
|
mas01cr@509
|
88 // First feature vector in shingle
|
mas01cr@509
|
89 if(idx == 0) {
|
mas01cr@509
|
90 while(vi!=ve) {
|
mas01cr@509
|
91 *vi++ = (float)(*fvp++);
|
mas01cr@509
|
92 }
|
mas01cr@509
|
93 } else {
|
mas01cr@509
|
94 // Not first feature vector in shingle
|
mas01cr@509
|
95 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim;
|
mas01cr@509
|
96 // Previous seqLen-1 dim-vectors
|
mas01cr@509
|
97 while(vi!=ve-dim) {
|
mas01cr@509
|
98 *vi++ = *ui++;
|
mas01cr@509
|
99 }
|
mas01cr@509
|
100 // Move data pointer to next feature vector
|
mas01cr@509
|
101 fvp += ( seqLen + idx - 1 ) * dim ;
|
mas01cr@509
|
102 // New d-vector
|
mas01cr@509
|
103 while(vi!=ve) {
|
mas01cr@509
|
104 *vi++ = (float)(*fvp++);
|
mas01cr@509
|
105 }
|
mas01cr@509
|
106 }
|
mas01cr@509
|
107 }
|
mas01cr@509
|
108
|
mas01cr@509
|
109 // in-place norming, no deletions. If using power, return number of
|
mas01cr@509
|
110 // shingles above power threshold.
|
mas01cr@589
|
111 int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, uint32_t dim, uint32_t seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
|
mas01cr@509
|
112 int z = 0; // number of above-threshold shingles
|
mas01cr@509
|
113 float l2norm;
|
mas01cr@509
|
114 double power;
|
mas01cr@509
|
115 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
|
mas01cr@509
|
116 float oneOverSqrtl2NormDivRad = oneOverRadius;
|
mas01cr@589
|
117 uint32_t shingleSize = seqLen * dim;
|
mas01cr@509
|
118
|
mas01cr@509
|
119 if(!spp) {
|
mas01cr@509
|
120 return -1;
|
mas01cr@509
|
121 }
|
mas01cr@589
|
122 for(uint32_t a=0; a<(*vv).size(); a++){
|
mas01cr@509
|
123 l2norm = (float)(*snp++);
|
mas01cr@509
|
124 if(normed_vectors)
|
mas01cr@509
|
125 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
|
mas01cr@509
|
126
|
mas01cr@589
|
127 for(uint32_t b=0; b < shingleSize ; b++)
|
mas01cr@509
|
128 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
|
mas01cr@509
|
129
|
mas01cr@509
|
130 power = *spp++;
|
mas01cr@509
|
131 if(use_pthreshold){
|
mas01cr@509
|
132 if (power >= pthreshold)
|
mas01cr@509
|
133 z++;
|
mas01cr@509
|
134 }
|
mas01cr@509
|
135 else
|
mas01cr@509
|
136 z++;
|
mas01cr@509
|
137 }
|
mas01cr@509
|
138 return z;
|
mas01cr@509
|
139 }
|