mas01cr@509
|
1 extern "C" {
|
mas01cr@509
|
2 #include "audioDB_API.h"
|
mas01cr@509
|
3 }
|
mas01cr@509
|
4 #include "audioDB-internals.h"
|
mas01cr@509
|
5
|
mas01cr@509
|
6 /*
|
mas01cr@509
|
7 * Routines which are common to both indexed query and index creation:
|
mas01cr@509
|
8 * we put them in their own file for build logistics.
|
mas01cr@509
|
9 */
|
mas01cr@509
|
10
|
mas01cr@509
|
11 /* FIXME: there are several things wrong with this: the memory
|
mas01cr@509
|
12 * discipline isn't ideal, the radius printing is a bit lame, the name
|
mas01cr@509
|
13 * getting will succeed or fail depending on whether the path was
|
mas01cr@509
|
14 * relative or absolute -- but most importantly encoding all that
|
mas01cr@509
|
15 * information in a filename is going to lose: it's impossible to
|
mas01cr@509
|
16 * maintain backwards-compatibility. Instead we should probably store
|
mas01cr@509
|
17 * the index metadata inside the audiodb instance. */
|
mas01cr@509
|
18 char *audiodb_index_get_name(const char *dbName, double radius, Uns32T sequenceLength) {
|
mas01cr@509
|
19 char *indexName;
|
mas01cr@509
|
20 if(strlen(dbName) > (ADB_MAXSTR - 32)) {
|
mas01cr@509
|
21 return NULL;
|
mas01cr@509
|
22 }
|
mas01cr@509
|
23 indexName = new char[ADB_MAXSTR];
|
mas01cr@509
|
24 strncpy(indexName, dbName, ADB_MAXSTR);
|
mas01cr@509
|
25 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
|
mas01cr@509
|
26 return indexName;
|
mas01cr@509
|
27 }
|
mas01cr@509
|
28
|
mas01cr@509
|
29 bool audiodb_index_exists(const char *dbName, double radius, Uns32T sequenceLength) {
|
mas01cr@509
|
30 char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
|
mas01cr@509
|
31 if(!indexName) {
|
mas01cr@509
|
32 return false;
|
mas01cr@509
|
33 }
|
mas01cr@509
|
34 struct stat st;
|
mas01cr@509
|
35 if(stat(indexName, &st)) {
|
mas01cr@509
|
36 delete [] indexName;
|
mas01cr@509
|
37 return false;
|
mas01cr@509
|
38 }
|
mas01cr@509
|
39 /* FIXME: other stat checks here? */
|
mas01cr@509
|
40 /* FIXME: is there any better way to check whether we can open a
|
mas01cr@509
|
41 * file for reading than by opening a file for reading? */
|
mas01cr@509
|
42 int fd = open(indexName, O_RDONLY);
|
mas01cr@509
|
43 delete [] indexName;
|
mas01cr@509
|
44 if(fd < 0) {
|
mas01cr@509
|
45 return false;
|
mas01cr@509
|
46 } else {
|
mas01cr@509
|
47 close(fd);
|
mas01cr@509
|
48 return true;
|
mas01cr@509
|
49 }
|
mas01cr@509
|
50 }
|
mas01cr@509
|
51
|
mas01cr@509
|
52 /* FIXME: the indexName arg should be "const char *", but the LSH
|
mas01cr@509
|
53 * library doesn't like that.
|
mas01cr@509
|
54 */
|
mas01cr@509
|
55 LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) {
|
mas01cr@509
|
56 LSH *lsh;
|
mas01cr@509
|
57 if(adb->cached_lsh) {
|
mas01cr@509
|
58 if(!strncmp(adb->cached_lsh->get_indexName(), indexName, ADB_MAXSTR)) {
|
mas01cr@509
|
59 return adb->cached_lsh;
|
mas01cr@509
|
60 } else {
|
mas01cr@509
|
61 delete adb->cached_lsh;
|
mas01cr@509
|
62 }
|
mas01cr@509
|
63 }
|
mas01cr@509
|
64 lsh = new LSH(indexName, load_tables);
|
mas01cr@509
|
65 if(load_tables) {
|
mas01cr@509
|
66 adb->cached_lsh = lsh;
|
mas01cr@509
|
67 }
|
mas01cr@509
|
68 return lsh;
|
mas01cr@509
|
69 }
|
mas01cr@509
|
70
|
mas01cr@509
|
71 vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) {
|
mas01cr@509
|
72 std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
|
mas01cr@509
|
73 for(Uns32T i=0 ; i < sz ; i++) {
|
mas01cr@509
|
74 (*vv)[i]=vector<float>(dim * seqLen);
|
mas01cr@509
|
75 }
|
mas01cr@509
|
76 return vv;
|
mas01cr@509
|
77 }
|
mas01cr@509
|
78
|
mas01cr@509
|
79 void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
|
mas01cr@509
|
80 delete vv;
|
mas01cr@509
|
81 }
|
mas01cr@509
|
82
|
mas01cr@509
|
83 void audiodb_index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
|
mas01cr@509
|
84 assert(idx<(*vv).size());
|
mas01cr@509
|
85 vector<float>::iterator ve = (*vv)[idx].end();
|
mas01cr@509
|
86 vector<float>::iterator vi = (*vv)[idx].begin();
|
mas01cr@509
|
87 // First feature vector in shingle
|
mas01cr@509
|
88 if(idx == 0) {
|
mas01cr@509
|
89 while(vi!=ve) {
|
mas01cr@509
|
90 *vi++ = (float)(*fvp++);
|
mas01cr@509
|
91 }
|
mas01cr@509
|
92 } else {
|
mas01cr@509
|
93 // Not first feature vector in shingle
|
mas01cr@509
|
94 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim;
|
mas01cr@509
|
95 // Previous seqLen-1 dim-vectors
|
mas01cr@509
|
96 while(vi!=ve-dim) {
|
mas01cr@509
|
97 *vi++ = *ui++;
|
mas01cr@509
|
98 }
|
mas01cr@509
|
99 // Move data pointer to next feature vector
|
mas01cr@509
|
100 fvp += ( seqLen + idx - 1 ) * dim ;
|
mas01cr@509
|
101 // New d-vector
|
mas01cr@509
|
102 while(vi!=ve) {
|
mas01cr@509
|
103 *vi++ = (float)(*fvp++);
|
mas01cr@509
|
104 }
|
mas01cr@509
|
105 }
|
mas01cr@509
|
106 }
|
mas01cr@509
|
107
|
mas01cr@509
|
108 // in-place norming, no deletions. If using power, return number of
|
mas01cr@509
|
109 // shingles above power threshold.
|
mas01cr@509
|
110 int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
|
mas01cr@509
|
111 int z = 0; // number of above-threshold shingles
|
mas01cr@509
|
112 float l2norm;
|
mas01cr@509
|
113 double power;
|
mas01cr@509
|
114 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
|
mas01cr@509
|
115 float oneOverSqrtl2NormDivRad = oneOverRadius;
|
mas01cr@509
|
116 Uns32T shingleSize = seqLen * dim;
|
mas01cr@509
|
117
|
mas01cr@509
|
118 if(!spp) {
|
mas01cr@509
|
119 return -1;
|
mas01cr@509
|
120 }
|
mas01cr@509
|
121 for(Uns32T a=0; a<(*vv).size(); a++){
|
mas01cr@509
|
122 l2norm = (float)(*snp++);
|
mas01cr@509
|
123 if(normed_vectors)
|
mas01cr@509
|
124 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
|
mas01cr@509
|
125
|
mas01cr@509
|
126 for(Uns32T b=0; b < shingleSize ; b++)
|
mas01cr@509
|
127 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
|
mas01cr@509
|
128
|
mas01cr@509
|
129 power = *spp++;
|
mas01cr@509
|
130 if(use_pthreshold){
|
mas01cr@509
|
131 if (power >= pthreshold)
|
mas01cr@509
|
132 z++;
|
mas01cr@509
|
133 }
|
mas01cr@509
|
134 else
|
mas01cr@509
|
135 z++;
|
mas01cr@509
|
136 }
|
mas01cr@509
|
137 return z;
|
mas01cr@509
|
138 }
|