mas01mc@292
|
1 // LSH indexing
|
mas01mc@292
|
2 //
|
mas01mc@292
|
3 // Construct a persistent LSH table structure
|
mas01mc@292
|
4 // Store at the same location as dbName
|
mas01mc@292
|
5 // Naming convention:
|
mas01mc@292
|
6 // dbName.lsh.${radius}.${sequenceLength}
|
mas01mc@292
|
7 //
|
mas01mc@292
|
8 //
|
mas01mc@292
|
9 // Author: Michael Casey
|
mas01mc@292
|
10 // Date: 23 June 2008
|
mas01mc@292
|
11
|
mas01mc@292
|
12 #include "audioDB.h"
|
mas01mc@292
|
13 #include "ReporterBase.h"
|
mas01mc@292
|
14
|
mas01mc@292
|
15
|
mas01mc@292
|
16 /************************* LSH point index to audioDB conversion *****************/
|
mas01mc@292
|
17 Uns32T audioDB::index_to_trackID(Uns32T lshID){
|
mas01mc@292
|
18 return lshID>>LSH_N_POINT_BITS;
|
mas01mc@292
|
19 }
|
mas01mc@292
|
20
|
mas01mc@292
|
21 Uns32T audioDB::index_to_trackPos(Uns32T lshID){
|
mas01mc@292
|
22 return lshID&LSH_POINT_MASK;
|
mas01mc@292
|
23 }
|
mas01mc@292
|
24
|
mas01mc@292
|
25 Uns32T audioDB::index_from_trackInfo(Uns32T trackID, Uns32T spos){
|
mas01mc@292
|
26 return (trackID << LSH_N_POINT_BITS) | spos;
|
mas01mc@292
|
27 }
|
mas01mc@292
|
28
|
mas01mc@292
|
29 /************************* LSH indexing and query initialization *****************/
|
mas01mc@292
|
30
|
mas01mc@292
|
31 char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){
|
mas01mc@292
|
32 char* indexName = new char[MAXSTR];
|
mas01mc@292
|
33 // Attempt to make new file
|
mas01mc@292
|
34 if(strlen(dbName) > (MAXSTR - 32))
|
mas01mc@292
|
35 error("dbName is too long for LSH index filename appendages");
|
mas01mc@292
|
36 strncpy(indexName, dbName, MAXSTR);
|
mas01mc@292
|
37 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
|
mas01mc@292
|
38 return indexName;
|
mas01mc@292
|
39 }
|
mas01mc@292
|
40
|
mas01mc@292
|
41 // return true if index exists else return false
|
mas01mc@292
|
42 int audioDB::index_exists(const char* dbName, double radius, Uns32T sequenceLength){
|
mas01mc@292
|
43 // Test to see if file exists
|
mas01mc@292
|
44 char* indexName = index_get_name(dbName, radius, sequenceLength);
|
mas01mc@292
|
45 lshfid = open (indexName, O_RDONLY);
|
mas01mc@292
|
46 delete[] indexName;
|
mas01mc@292
|
47 close(lshfid);
|
mas01mc@292
|
48
|
mas01mc@292
|
49 if(lshfid<0)
|
mas01mc@292
|
50 return false;
|
mas01mc@292
|
51 else
|
mas01mc@292
|
52 return true;
|
mas01mc@292
|
53 }
|
mas01mc@292
|
54
|
mas01mc@308
|
55 LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){
|
mas01mc@308
|
56 LSH* gIndx=SERVER_LSH_INDEX_SINGLETON;
|
mas01mc@308
|
57 if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) )
|
mas01mc@308
|
58 audioDB::lsh = gIndx; // Use the global SERVER resident index
|
mas01mc@308
|
59 else{
|
mas01mc@308
|
60 if(audioDB::lsh)
|
mas01mc@308
|
61 delete audioDB::lsh;
|
mas01mc@308
|
62 audioDB::lsh = new LSH(indexName, load_hashTables);
|
mas01mc@308
|
63 }
|
mas01mc@308
|
64 assert(audioDB::lsh);
|
mas01mc@308
|
65 return audioDB::lsh;
|
mas01mc@308
|
66 }
|
mas01mc@308
|
67
|
mas01mc@292
|
68 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){
|
mas01mc@292
|
69 if(vv)
|
mas01mc@292
|
70 delete vv;
|
mas01mc@292
|
71 vv = new vector<vector<float> >(sz);
|
mas01mc@292
|
72 for(Uns32T i=0 ; i < sz ; i++)
|
mas01mc@292
|
73 (*vv)[i]=vector<float>(dbH->dim*sequenceLength); // allocate shingle storage
|
mas01mc@292
|
74 return vv;
|
mas01mc@292
|
75 }
|
mas01mc@292
|
76
|
mas01mc@292
|
77 /******************** LSH indexing audioDB database access forall s \in {S} ***********************/
|
mas01mc@292
|
78
|
mas01mc@292
|
79 // Prepare the AudioDB database for read access and allocate auxillary memory
|
mas01mc@292
|
80 void audioDB::index_initialize(double **snp, double **vsnp, double **spp, double **vspp, Uns32T *dvp) {
|
mas01mc@292
|
81 *dvp = dbH->length / (dbH->dim * sizeof(double)); // number of database vectors
|
mas01mc@292
|
82 *snp = new double[*dvp]; // songs norm pointer: L2 norm table for each vector
|
mas01mc@292
|
83
|
mas01mc@292
|
84 double *snpp = *snp, *sppp = 0;
|
mas01mc@292
|
85 memcpy(*snp, l2normTable, *dvp * sizeof(double));
|
mas01mc@292
|
86
|
mas01mc@292
|
87 if (!(dbH->flags & O2_FLAG_POWER)) {
|
mas01mc@292
|
88 error("database not power-enabled", dbName);
|
mas01mc@292
|
89 }
|
mas01mc@292
|
90 *spp = new double[*dvp]; // song powertable pointer
|
mas01mc@292
|
91 sppp = *spp;
|
mas01mc@292
|
92 memcpy(*spp, powerTable, *dvp * sizeof(double));
|
mas01mc@292
|
93
|
mas01mc@292
|
94 for(Uns32T i = 0; i < dbH->numFiles; i++){
|
mas01mc@292
|
95 if(trackTable[i] >= sequenceLength) {
|
mas01mc@292
|
96 sequence_sum(snpp, trackTable[i], sequenceLength);
|
mas01mc@292
|
97 sequence_sqrt(snpp, trackTable[i], sequenceLength);
|
mas01mc@292
|
98
|
mas01mc@292
|
99 sequence_sum(sppp, trackTable[i], sequenceLength);
|
mas01mc@292
|
100 sequence_average(sppp, trackTable[i], sequenceLength);
|
mas01mc@292
|
101 }
|
mas01mc@292
|
102 snpp += trackTable[i];
|
mas01mc@292
|
103 sppp += trackTable[i];
|
mas01mc@292
|
104 }
|
mas01mc@292
|
105
|
mas01mc@292
|
106 *vsnp = *snp;
|
mas01mc@292
|
107 *vspp = *spp;
|
mas01mc@292
|
108
|
mas01mc@292
|
109 // Move the feature vector read pointer to start of fetures in database
|
mas01mc@292
|
110 lseek(dbfid, dbH->dataOffset, SEEK_SET);
|
mas01mc@292
|
111 }
|
mas01mc@292
|
112
|
mas01mc@292
|
113
|
mas01mc@292
|
114 /************************ LSH indexing ***********************************/
|
mas01mc@292
|
115 void audioDB::index_index_db(const char* dbName){
|
mas01mc@292
|
116
|
mas01mc@292
|
117 char* newIndexName;
|
mas01mc@292
|
118 double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0;
|
mas01mc@292
|
119 Uns32T dbVectors = 0;
|
mas01mc@292
|
120
|
mas01mc@292
|
121 printf("INDEX: initializing header\n");
|
mas01mc@292
|
122 // Check if audioDB exists, initialize header and open database for read
|
mas01mc@292
|
123 forWrite = false;
|
mas01mc@292
|
124 initDBHeader(dbName);
|
mas01mc@292
|
125
|
mas01mc@292
|
126 newIndexName = index_get_name(dbName, radius, sequenceLength);
|
mas01mc@292
|
127
|
mas01mc@292
|
128 // Set unit norming flag override
|
mas01mc@292
|
129 audioDB::normalizedDistance = !audioDB::no_unit_norming;
|
mas01mc@292
|
130
|
mas01mc@292
|
131 printf("INDEX: dim %d\n", dbH->dim);
|
mas01mc@292
|
132 printf("INDEX: R %f\n", radius);
|
mas01mc@292
|
133 printf("INDEX: seqlen %d\n", sequenceLength);
|
mas01mc@292
|
134 printf("INDEX: lsh_w %f\n", lsh_param_w);
|
mas01mc@292
|
135 printf("INDEX: lsh_k %d\n", lsh_param_k);
|
mas01mc@292
|
136 printf("INDEX: lsh_m %d\n", lsh_param_m);
|
mas01mc@292
|
137 printf("INDEX: lsh_N %d\n", lsh_param_N);
|
mas01mc@296
|
138 printf("INDEX: lsh_C %d\n", lsh_param_ncols);
|
mas01mc@292
|
139 printf("INDEX: lsh_b %d\n", lsh_param_b);
|
mas01mc@292
|
140 printf("INDEX: normalized? %s\n", normalizedDistance?"true":"false");
|
mas01mc@292
|
141 fflush(stdout);
|
mas01mc@292
|
142
|
mas01mc@292
|
143
|
mas01mc@292
|
144 index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
|
mas01mc@292
|
145
|
mas01mc@292
|
146 if((lshfid = open(newIndexName,O_RDONLY))<0){
|
mas01mc@292
|
147 printf("INDEX: constructing new LSH index\n");
|
mas01mc@292
|
148 printf("INDEX: making index file %s\n", newIndexName);
|
mas01mc@292
|
149 fflush(stdout);
|
mas01mc@292
|
150 // Construct new LSH index
|
mas01mc@292
|
151 lsh = new LSH((float)lsh_param_w, lsh_param_k,
|
mas01mc@292
|
152 lsh_param_m,
|
mas01mc@292
|
153 (Uns32T)(sequenceLength*dbH->dim),
|
mas01mc@292
|
154 lsh_param_N,
|
mas01mc@292
|
155 lsh_param_ncols,
|
mas01mc@292
|
156 (float)radius);
|
mas01mc@292
|
157 assert(lsh);
|
mas01mc@292
|
158
|
mas01mc@292
|
159 Uns32T endTrack = lsh_param_b;
|
mas01mc@292
|
160 if( endTrack > dbH->numFiles)
|
mas01mc@292
|
161 endTrack = dbH->numFiles;
|
mas01mc@292
|
162 // Insert up to lsh_param_b tracks
|
mas01mc@292
|
163 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
|
mas01mc@292
|
164 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1);
|
mas01mc@292
|
165
|
mas01mc@292
|
166 // Clean up
|
mas01mc@292
|
167 delete lsh;
|
mas01mc@308
|
168 lsh = 0;
|
mas01mc@292
|
169 close(lshfid);
|
mas01mc@292
|
170 }
|
mas01mc@292
|
171
|
mas01mc@292
|
172 // Attempt to open LSH file
|
mas01mc@292
|
173 if((lshfid = open(newIndexName,O_RDONLY))>0){
|
mas01mc@292
|
174 printf("INDEX: merging with existing LSH index\n");
|
mas01mc@292
|
175 fflush(stdout);
|
mas01mc@292
|
176
|
mas01mc@292
|
177 // Get the lsh header info and find how many tracks are inserted already
|
mas01mc@292
|
178 lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here
|
mas01mc@292
|
179 assert(lsh);
|
mas01mc@292
|
180 Uns32T maxs = index_to_trackID(lsh->get_maxp())+1;
|
mas01mc@292
|
181 delete lsh;
|
mas01mc@308
|
182 lsh = 0;
|
mas01mc@292
|
183
|
mas01mc@292
|
184 // This allows for updating index after more tracks are inserted into audioDB
|
mas01mc@292
|
185 for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){
|
mas01mc@292
|
186
|
mas01mc@292
|
187 Uns32T endTrack = startTrack + lsh_param_b;
|
mas01mc@292
|
188 if( endTrack > dbH->numFiles)
|
mas01mc@292
|
189 endTrack = dbH->numFiles;
|
mas01mc@292
|
190 printf("Indexing track range: %d - %d\n", startTrack, endTrack);
|
mas01mc@292
|
191 fflush(stdout);
|
mas01mc@292
|
192 lsh = new LSH(newIndexName, lsh_in_core); // Initialize core memory for LSH tables
|
mas01mc@292
|
193 assert(lsh);
|
mas01mc@292
|
194
|
mas01mc@292
|
195 // Insert up to lsh_param_b database tracks
|
mas01mc@292
|
196 index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
|
mas01mc@292
|
197
|
mas01mc@292
|
198 // Serialize to file
|
mas01mc@292
|
199 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
|
mas01mc@292
|
200 delete lsh;
|
mas01mc@308
|
201 lsh = 0;
|
mas01mc@292
|
202 }
|
mas01mc@292
|
203
|
mas01mc@292
|
204 close(lshfid);
|
mas01mc@292
|
205 printf("INDEX: done constructing LSH index.\n");
|
mas01mc@292
|
206 fflush(stdout);
|
mas01mc@292
|
207
|
mas01mc@292
|
208 }
|
mas01mc@292
|
209 else{
|
mas01mc@292
|
210 error("Something's wrong with LSH index file");
|
mas01mc@292
|
211 exit(1);
|
mas01mc@292
|
212 }
|
mas01mc@292
|
213
|
mas01mc@292
|
214
|
mas01mc@292
|
215 delete[] newIndexName;
|
mas01mc@292
|
216 if(sNorm)
|
mas01mc@292
|
217 delete[] sNorm;
|
mas01mc@292
|
218 if(sPower)
|
mas01mc@292
|
219 delete[] sPower;
|
mas01mc@292
|
220
|
mas01mc@292
|
221
|
mas01mc@292
|
222 }
|
mas01mc@292
|
223
|
mas01mc@292
|
224 void audioDB::index_insert_tracks(Uns32T start_track, Uns32T end_track,
|
mas01mc@292
|
225 double** fvpp, double** sNormpp,double** snPtrp,
|
mas01mc@292
|
226 double** sPowerp, double** spPtrp){
|
mas01mc@292
|
227 size_t nfv = 0;
|
mas01mc@292
|
228 double* fvp = 0; // Keep pointer for memory allocation and free() for track data
|
mas01mc@292
|
229 Uns32T trackID = 0;
|
mas01mc@292
|
230
|
mas01mc@292
|
231 VERB_LOG(1, "indexing tracks...");
|
mas01mc@292
|
232
|
mas01mc@292
|
233
|
mas01mc@292
|
234 for(trackID = start_track ; trackID < end_track ; trackID++ ){
|
mas01mc@292
|
235 read_data(trackID, &fvp, &nfv); // over-writes fvp and nfv
|
mas01mc@292
|
236 *fvpp = fvp; // Protect memory allocation and free() for track data
|
mas01mc@292
|
237 if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp))
|
mas01mc@292
|
238 break;
|
mas01mc@292
|
239 }
|
mas01mc@292
|
240 std::cout << "finished inserting." << endl;
|
mas01mc@292
|
241 }
|
mas01mc@292
|
242
|
mas01mc@292
|
243 int audioDB::index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp){
|
mas01mc@292
|
244 // Loop over the current input track's vectors
|
mas01cr@305
|
245 Uns32T numVecs = 0;
|
mas01cr@305
|
246 if (trackTable[trackID] > O2_MAXTRACKLEN) {
|
mas01cr@305
|
247 if (O2_MAXTRACKLEN < sequenceLength - 1) {
|
mas01cr@305
|
248 numVecs = 0;
|
mas01cr@305
|
249 } else {
|
mas01cr@305
|
250 numVecs = O2_MAXTRACKLEN - sequenceLength + 1;
|
mas01cr@305
|
251 }
|
mas01cr@305
|
252 } else {
|
mas01cr@305
|
253 if (trackTable[trackID] < sequenceLength - 1) {
|
mas01cr@305
|
254 numVecs = 0;
|
mas01cr@305
|
255 } else {
|
mas01cr@305
|
256 numVecs = trackTable[trackID] - sequenceLength + 1;
|
mas01cr@305
|
257 }
|
mas01cr@305
|
258 }
|
mas01mc@292
|
259 vv = index_initialize_shingles(numVecs);
|
mas01mc@292
|
260
|
mas01mc@292
|
261 for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ )
|
mas01mc@292
|
262 index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength);
|
mas01mc@292
|
263
|
mas01mc@292
|
264 Uns32T numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp);
|
mas01mc@292
|
265 Uns32T collisionCount = index_insert_shingles(vv, trackID, *sppp);
|
mas01mc@292
|
266 float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0;
|
mas01mc@292
|
267
|
mas01mc@292
|
268 /* index_norm_shingles() only goes as far as the end of the
|
mas01mc@292
|
269 sequence, which is right, but the space allocated is for the
|
mas01mc@292
|
270 whole track. */
|
mas01mc@292
|
271
|
mas01mc@292
|
272 /* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN
|
mas01mc@292
|
273 * So let's be certain the pointers are in the correct place
|
mas01mc@292
|
274 */
|
mas01mc@292
|
275
|
mas01mc@292
|
276 *snpp += trackTable[trackID];
|
mas01mc@292
|
277 *sppp += trackTable[trackID];
|
mas01mc@292
|
278 *fvpp += trackTable[trackID] * dbH->dim;
|
mas01mc@292
|
279
|
mas01mc@292
|
280 std::cout << " n=" << trackTable[trackID] << " n'=" << numVecsAboveThreshold << " E[#c]=" << lsh->get_mean_collision_rate() << " E[#p]=" << meanCollisionCount << endl;
|
mas01mc@292
|
281 std::cout.flush();
|
mas01mc@292
|
282 return true;
|
mas01mc@292
|
283 }
|
mas01mc@292
|
284
|
mas01mc@292
|
285 Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){
|
mas01mc@292
|
286 Uns32T collisionCount = 0;
|
mas01mc@292
|
287 cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE;
|
mas01mc@311
|
288 for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop)
|
mas01mc@311
|
289 if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold))){
|
mas01mc@292
|
290 collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID));
|
mas01mc@311
|
291 spp+=sequenceHop;
|
mas01mc@311
|
292 }
|
mas01mc@292
|
293 return collisionCount;
|
mas01mc@292
|
294 }
|
mas01mc@292
|
295
|
mas01mc@292
|
296 /********************* LSH shingle construction ***************************/
|
mas01mc@292
|
297
|
mas01mc@292
|
298 // Construct shingles out of a feature matrix
|
mas01mc@292
|
299 // inputs:
|
mas01mc@292
|
300 // idx is vector index in feature matrix
|
mas01mc@292
|
301 // fvp is base feature matrix pointer double* [numVecs x dbH->dim]
|
mas01mc@292
|
302 //
|
mas01mc@292
|
303 // pre-conditions:
|
mas01mc@292
|
304 // dbH->dim
|
mas01mc@292
|
305 // sequenceLength
|
mas01mc@292
|
306 // idx < numVectors - sequenceLength + 1
|
mas01mc@292
|
307 //
|
mas01mc@292
|
308 // post-conditions:
|
mas01mc@292
|
309 // (*vv)[idx] contains a shingle with dbH->dim*sequenceLength float values
|
mas01mc@292
|
310
|
mas01mc@292
|
311 void audioDB::index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
|
mas01mc@292
|
312 assert(idx<(*vv).size());
|
mas01mc@292
|
313 vector<float>::iterator ve = (*vv)[idx].end();
|
mas01mc@292
|
314 vi=(*vv)[idx].begin(); // shingle iterator
|
mas01mc@292
|
315 // First feature vector in shingle
|
mas01mc@292
|
316 if(idx==0){
|
mas01mc@292
|
317 while(vi!=ve)
|
mas01mc@292
|
318 *vi++ = (float)(*fvp++);
|
mas01mc@292
|
319 }
|
mas01mc@292
|
320 // Not first feature vector in shingle
|
mas01mc@292
|
321 else{
|
mas01mc@292
|
322 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim; // previous shingle iterator
|
mas01mc@292
|
323 // Previous seqLen-1 dim-vectors
|
mas01mc@292
|
324 while(vi!=ve-dim)
|
mas01mc@292
|
325 *vi++=*ui++;
|
mas01mc@292
|
326 // Move data pointer to next feature vector
|
mas01mc@292
|
327 fvp += ( seqLen + idx - 1 ) * dim ;
|
mas01mc@292
|
328 // New d-vector
|
mas01mc@292
|
329 while(vi!=ve)
|
mas01mc@292
|
330 *vi++ = (float)(*fvp++);
|
mas01mc@292
|
331 }
|
mas01mc@292
|
332 }
|
mas01mc@292
|
333
|
mas01mc@292
|
334 // norm shingles
|
mas01mc@292
|
335 // in-place norming, no deletions
|
mas01mc@292
|
336 // If using power, return number of shingles above power threshold
|
mas01mc@292
|
337 int audioDB::index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp){
|
mas01mc@292
|
338 int z = 0; // number of above-threshold shingles
|
mas01mc@292
|
339 float l2norm;
|
mas01mc@292
|
340 double power;
|
mas01mc@292
|
341 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
|
mas01mc@292
|
342 float oneOverSqrtl2NormDivRad = oneOverRadius;
|
mas01mc@292
|
343 if(!spp)
|
mas01mc@292
|
344 error("LSH indexing and query requires a power feature using -w or -W");
|
mas01mc@292
|
345 Uns32T shingleSize = sequenceLength*dbH->dim;
|
mas01mc@292
|
346 for(Uns32T a=0; a<(*vv).size(); a++){
|
mas01mc@292
|
347 l2norm = (float)(*snp++);
|
mas01mc@292
|
348 if(audioDB::normalizedDistance)
|
mas01mc@292
|
349 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
|
mas01mc@292
|
350
|
mas01mc@292
|
351 for(Uns32T b=0; b < shingleSize ; b++)
|
mas01mc@292
|
352 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
|
mas01mc@292
|
353
|
mas01mc@292
|
354 power = *spp++;
|
mas01mc@292
|
355 if(use_absolute_threshold){
|
mas01mc@292
|
356 if ( power >= absolute_threshold )
|
mas01mc@292
|
357 z++;
|
mas01mc@292
|
358 }
|
mas01mc@292
|
359 else
|
mas01mc@292
|
360 z++;
|
mas01mc@292
|
361 }
|
mas01mc@292
|
362 return z;
|
mas01mc@292
|
363 }
|
mas01mc@292
|
364
|
mas01mc@292
|
365
|
mas01mc@292
|
366 /*********************** LSH retrieval ****************************/
|
mas01mc@292
|
367
|
mas01mc@292
|
368
|
mas01mc@292
|
369 // return true if indexed query performed else return false
|
mas01mc@292
|
370 int audioDB::index_init_query(const char* dbName){
|
mas01mc@292
|
371
|
mas01mc@292
|
372 if(!(index_exists(dbName, radius, sequenceLength)))
|
mas01mc@292
|
373 return false;
|
mas01mc@292
|
374
|
mas01mc@292
|
375 char* indexName = index_get_name(dbName, radius, sequenceLength);
|
mas01mc@292
|
376
|
mas01mc@292
|
377 // Test to see if file exists
|
mas01mc@292
|
378 if((lshfid = open (indexName, O_RDONLY)) < 0){
|
mas01mc@292
|
379 delete[] indexName;
|
mas01mc@292
|
380 return false;
|
mas01mc@292
|
381 }
|
mas01mc@292
|
382
|
mas01mc@308
|
383 lsh = index_allocate(indexName, false); // Get the header only here
|
mas01mc@292
|
384 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim
|
mas01mc@292
|
385
|
mas01mc@311
|
386 if(lsh!=SERVER_LSH_INDEX_SINGLETON){
|
mas01mc@308
|
387 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE))
|
mas01mc@308
|
388 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius());
|
mas01mc@308
|
389 printf("INDEX: dim %d\n", dbH->dim);
|
mas01mc@308
|
390 printf("INDEX: R %f\n", lsh->get_radius());
|
mas01mc@308
|
391 printf("INDEX: seqlen %d\n", sequenceLength);
|
mas01mc@308
|
392 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth());
|
mas01mc@308
|
393 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns());
|
mas01mc@308
|
394 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables());
|
mas01mc@308
|
395 printf("INDEX: N %d\n", lsh->get_lshHeader()->get_numRows());
|
mas01mc@308
|
396 printf("INDEX: s %d\n", index_to_trackID(lsh->get_maxp()));
|
mas01mc@308
|
397 printf("INDEX: Opened LSH index file %s\n", indexName);
|
mas01mc@308
|
398 fflush(stdout);
|
mas01mc@308
|
399 }
|
mas01mc@292
|
400
|
mas01mc@292
|
401 // Check to see if we are loading hash tables into core, and do so if true
|
mas01mc@292
|
402 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){
|
mas01mc@308
|
403 if(SERVER_LSH_INDEX_SINGLETON)
|
mas01mc@308
|
404 fprintf(stderr,"INDEX: using persistent hash tables: %s\n", lsh->get_indexName());
|
mas01mc@308
|
405 else
|
mas01mc@308
|
406 printf("INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1");
|
mas01mc@308
|
407 lsh = index_allocate(indexName, true);
|
mas01mc@292
|
408 }
|
mas01mc@292
|
409
|
mas01mc@292
|
410 delete[] indexName;
|
mas01mc@292
|
411 return true;
|
mas01mc@292
|
412 }
|
mas01mc@292
|
413
|
mas01mc@292
|
414 // *Static* approximate NN point reporter callback method for lshlib
|
mas01mc@292
|
415 void audioDB::index_add_point_approximate(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){
|
mas01mc@292
|
416 assert(instancePtr); // We need an instance for this callback
|
mas01mc@292
|
417 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance
|
mas01mc@292
|
418 Uns32T trackID = index_to_trackID(pointID);
|
mas01mc@292
|
419 Uns32T spos = index_to_trackPos(pointID);
|
mas01mc@292
|
420 // Skip identity in query_from_key
|
mas01mc@292
|
421 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) )
|
mas01mc@292
|
422 myself->reporter->add_point(trackID, qpos, spos, dist);
|
mas01mc@292
|
423 }
|
mas01mc@292
|
424
|
mas01mc@292
|
425 // *Static* exact NN point reporter callback method for lshlib
|
mas01mc@292
|
426 // Maintain a queue of points to pass to query_points() for exact evaluation
|
mas01mc@292
|
427 void audioDB::index_add_point_exact(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){
|
mas01mc@292
|
428 assert(instancePtr); // We need an instance for this callback
|
mas01mc@292
|
429 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance
|
mas01mc@292
|
430 Uns32T trackID = index_to_trackID(pointID);
|
mas01mc@292
|
431 Uns32T spos = index_to_trackPos(pointID);
|
mas01mc@292
|
432 // Skip identity in query_from_key
|
mas01mc@292
|
433 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) )
|
mas01mc@292
|
434 myself->index_insert_exact_evaluation_queue(trackID, qpos, spos);
|
mas01mc@292
|
435 }
|
mas01mc@292
|
436
|
mas01mc@292
|
437 void audioDB::initialize_exact_evalutation_queue(){
|
mas01mc@292
|
438 if(exact_evaluation_queue)
|
mas01mc@292
|
439 delete exact_evaluation_queue;
|
mas01mc@292
|
440 exact_evaluation_queue = new priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >;
|
mas01mc@292
|
441 }
|
mas01mc@292
|
442
|
mas01mc@292
|
443 void audioDB::index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos){
|
mas01mc@292
|
444 PointPair p(trackID, qpos, spos);
|
mas01mc@292
|
445 exact_evaluation_queue->push(p);
|
mas01mc@292
|
446 }
|
mas01mc@292
|
447
|
mas01mc@292
|
448 // return 0: if index does not exist
|
mas01mc@292
|
449 // return nqv: if index exists
|
mas01mc@292
|
450 int audioDB::index_query_loop(const char* dbName, Uns32T queryIndex) {
|
mas01mc@292
|
451
|
mas01mc@292
|
452 unsigned int numVectors;
|
mas01mc@292
|
453 double *query, *query_data;
|
mas01mc@292
|
454 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0;
|
mas01mc@292
|
455 double meanQdur;
|
mas01mc@292
|
456 void (*add_point_func)(void*,Uns32T,Uns32T,float);
|
mas01mc@292
|
457
|
mas01mc@292
|
458 // Set the point-reporter callback based on the value of lsh_exact
|
mas01mc@292
|
459 if(lsh_exact){
|
mas01mc@292
|
460 initialize_exact_evalutation_queue();
|
mas01mc@292
|
461 add_point_func = &index_add_point_exact;
|
mas01mc@292
|
462 }
|
mas01mc@292
|
463 else
|
mas01mc@292
|
464 add_point_func = &index_add_point_approximate;
|
mas01mc@292
|
465
|
mas01mc@292
|
466 if(!index_init_query(dbName)) // sets-up LSH index structures for querying
|
mas01mc@292
|
467 return 0;
|
mas01mc@292
|
468
|
mas01mc@292
|
469 char* database = index_get_name(dbName, radius, sequenceLength);
|
mas01mc@292
|
470
|
mas01mc@292
|
471 if(query_from_key)
|
mas01mc@292
|
472 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex);
|
mas01mc@292
|
473 else
|
mas01mc@292
|
474 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); // get query vectors
|
mas01mc@292
|
475
|
mas01mc@292
|
476 VERB_LOG(1, "retrieving tracks...");
|
mas01mc@292
|
477
|
mas01mc@292
|
478 assert(pointNN>0 && pointNN<=O2_MAXNN);
|
mas01mc@292
|
479 assert(trackNN>0 && trackNN<=O2_MAXNN);
|
mas01mc@292
|
480
|
mas01mc@292
|
481 gettimeofday(&tv1, NULL);
|
mas01mc@292
|
482 // query vector index
|
mas01mc@292
|
483 Uns32T Nq = (numVectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:numVectors) - sequenceLength + 1;
|
mas01mc@292
|
484 vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles
|
mas01mc@292
|
485 cout << "Nq=" << Nq; cout.flush();
|
mas01mc@292
|
486 // Construct shingles from query features
|
mas01mc@292
|
487 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ )
|
mas01mc@292
|
488 index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength);
|
mas01mc@292
|
489
|
mas01mc@292
|
490 // Normalize query vectors
|
mas01mc@292
|
491 Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qnPtr, qpPtr );
|
mas01mc@292
|
492 cout << " Nq'=" << numVecsAboveThreshold << endl; cout.flush();
|
mas01mc@292
|
493
|
mas01mc@292
|
494 // Nq contains number of inspected points in query file,
|
mas01mc@292
|
495 // numVecsAboveThreshold is number of points with power >= absolute_threshold
|
mas01mc@292
|
496 double* qpp = qpPtr; // Keep original qpPtr for possible exact evaluation
|
mas01mc@292
|
497 if(usingQueryPoint && numVecsAboveThreshold){
|
mas01mc@292
|
498 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
|
mas01mc@292
|
499 lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, (void*)this);
|
mas01mc@292
|
500 else
|
mas01mc@292
|
501 lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, (void*)this);
|
mas01mc@292
|
502 }
|
mas01mc@292
|
503 else if(numVecsAboveThreshold)
|
mas01mc@292
|
504 for( Uns32T pointID = 0 ; pointID < Nq; pointID++ )
|
mas01mc@292
|
505 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold)))
|
mas01mc@292
|
506 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
|
mas01mc@292
|
507 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, (void*)this);
|
mas01mc@292
|
508 else
|
mas01mc@292
|
509 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, (void*)this);
|
mas01mc@292
|
510
|
mas01mc@292
|
511 if(lsh_exact)
|
mas01mc@292
|
512 // Perform exact distance computation on point pairs in exact_evaluation_queue
|
mas01mc@292
|
513 query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors);
|
mas01mc@292
|
514
|
mas01mc@292
|
515 gettimeofday(&tv2,NULL);
|
mas01mc@292
|
516 VERB_LOG(1,"elapsed time: %ld msec\n",
|
mas01mc@292
|
517 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
|
mas01mc@292
|
518 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
|
mas01mc@292
|
519
|
mas01mc@292
|
520 // Close the index file
|
mas01mc@292
|
521 close(lshfid);
|
mas01mc@292
|
522
|
mas01mc@292
|
523 // Clean up
|
mas01mc@292
|
524 if(query_data)
|
mas01mc@292
|
525 delete[] query_data;
|
mas01mc@292
|
526 if(qNorm)
|
mas01mc@292
|
527 delete[] qNorm;
|
mas01mc@292
|
528 if(qPower)
|
mas01mc@292
|
529 delete[] qPower;
|
mas01mc@292
|
530 if(database)
|
mas01mc@292
|
531 delete[] database;
|
mas01mc@292
|
532
|
mas01mc@292
|
533 return Nq;
|
mas01mc@292
|
534 }
|
mas01mc@292
|
535
|