comparison index.cpp @ 459:fcc6f7c4856b api-inversion

No more global shingle vector of vectors. Convert audioDB::index_initialize_shingles and audioDB::index_norm_shingles to plain old functions. In doing so, the latter in particular acquires a silly argument list; we need that complexity for now because it's called both from audioDB::query (which we're currently inverting) and from audioDB::index (which is out of scope for now). The loss of the global vv thing made me check up on memory discipline [hence the new API function audiodb_query_free_results() as well as the internal audiodb_index_delete_shingles()]. It's not too bad, but there are plenty of leaks for those with time to do AUDIODB="valgrind --leak-check=full ../../audioDB" sh ./run-test.sh on their favourite test case. For example, the Radius reporters leak one triple per hit. (Honestly, C++ memory management is teh suck.)
author mas01cr
date Sun, 28 Dec 2008 22:43:50 +0000
parents 913a95f06998
children 17003dff8127
comparison
equal deleted inserted replaced
458:913a95f06998 459:fcc6f7c4856b
60 } 60 }
61 assert(audioDB::lsh); 61 assert(audioDB::lsh);
62 return audioDB::lsh; 62 return audioDB::lsh;
63 } 63 }
64 64
65 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){ 65 vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) {
66 if(vv) 66 std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
67 delete vv; 67 for(Uns32T i=0 ; i < sz ; i++) {
68 vv = new vector<vector<float> >(sz); 68 (*vv)[i]=vector<float>(dim * seqLen);
69 for(Uns32T i=0 ; i < sz ; i++) 69 }
70 (*vv)[i]=vector<float>(dbH->dim*sequenceLength); // allocate shingle storage
71 return vv; 70 return vv;
71 }
72
73 void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
74 delete vv;
72 } 75 }
73 76
74 /******************** LSH indexing audioDB database access forall s \in {S} ***********************/ 77 /******************** LSH indexing audioDB database access forall s \in {S} ***********************/
75 78
76 // Prepare the AudioDB database for read access and allocate auxillary memory 79 // Prepare the AudioDB database for read access and allocate auxillary memory
152 } 155 }
153 156
154 // norm shingles 157 // norm shingles
155 // in-place norming, no deletions 158 // in-place norming, no deletions
156 // If using power, return number of shingles above power threshold 159 // If using power, return number of shingles above power threshold
157 int audioDB::index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp){ 160 int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
158 int z = 0; // number of above-threshold shingles 161 int z = 0; // number of above-threshold shingles
159 float l2norm; 162 float l2norm;
160 double power; 163 double power;
161 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2 164 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
162 float oneOverSqrtl2NormDivRad = oneOverRadius; 165 float oneOverSqrtl2NormDivRad = oneOverRadius;
163 if(!spp) 166 Uns32T shingleSize = seqLen * dim;
164 error("LSH indexing and query requires a power feature using -w or -W"); 167
165 Uns32T shingleSize = sequenceLength*dbH->dim; 168 if(!spp) {
169 return -1;
170 }
166 for(Uns32T a=0; a<(*vv).size(); a++){ 171 for(Uns32T a=0; a<(*vv).size(); a++){
167 l2norm = (float)(*snp++); 172 l2norm = (float)(*snp++);
168 if(audioDB::normalizedDistance) 173 if(normed_vectors)
169 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius; 174 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
170 175
171 for(Uns32T b=0; b < shingleSize ; b++) 176 for(Uns32T b=0; b < shingleSize ; b++)
172 (*vv)[a][b]*=oneOverSqrtl2NormDivRad; 177 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
173 178
174 power = *spp++; 179 power = *spp++;
175 if(use_absolute_threshold){ 180 if(use_pthreshold){
176 if ( power >= absolute_threshold ) 181 if (power >= pthreshold)
177 z++; 182 z++;
178 } 183 }
179 else 184 else
180 z++; 185 z++;
181 } 186 }
436 } 441 }
437 } 442 }
438 443
439 Uns32T numVecsAboveThreshold = 0, collisionCount = 0; 444 Uns32T numVecsAboveThreshold = 0, collisionCount = 0;
440 if(numVecs){ 445 if(numVecs){
441 vv = index_initialize_shingles(numVecs); 446 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(numVecs, dbH->dim, sequenceLength);
442 447
443 for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ ) 448 for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ )
444 audiodb_index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength); 449 audiodb_index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength);
445 450 int vcount = audiodb_index_norm_shingles(vv, *snpp, *sppp, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold);
446 numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp); 451 if(vcount == -1) {
452 audiodb_index_delete_shingles(vv);
453 error("failed to norm shingles");
454 }
455 numVecsAboveThreshold = vcount;
447 collisionCount = index_insert_shingles(vv, trackID, *sppp); 456 collisionCount = index_insert_shingles(vv, trackID, *sppp);
448 } 457 audiodb_index_delete_shingles(vv);
458 }
459
449 float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0; 460 float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0;
450 461
451 /* index_norm_shingles() only goes as far as the end of the 462 /* audiodb_index_norm_shingles() only goes as far as the end of the
452 sequence, which is right, but the space allocated is for the 463 sequence, which is right, but the space allocated is for the
453 whole track. */ 464 whole track. */
454 465
455 /* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN 466 /* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN
456 * So let's be certain the pointers are in the correct place 467 * So let's be certain the pointers are in the correct place
592 error("failed to set up qpointers"); 603 error("failed to set up qpointers");
593 } 604 }
594 605
595 // query vector index 606 // query vector index
596 Uns32T Nq = (qpointers.nvectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:qpointers.nvectors) - sequenceLength + 1; 607 Uns32T Nq = (qpointers.nvectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:qpointers.nvectors) - sequenceLength + 1;
597 vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles 608 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequenceLength); // allocate memory to copy query vectors to shingles
598 609
599 // Construct shingles from query features 610 // Construct shingles from query features
600 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ ) 611 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ )
601 audiodb_index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength); 612 audiodb_index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength);
602 613
603 // Normalize query vectors 614 // Normalize query vectors
604 Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qpointers.l2norm, qpointers.power ); 615 int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold);
616 if(vcount == -1) {
617 audiodb_index_delete_shingles(vv);
618 error("failed to norm shingles");
619 }
620 Uns32T numVecsAboveThreshold = vcount;
605 621
606 // Nq contains number of inspected points in query file, 622 // Nq contains number of inspected points in query file,
607 // numVecsAboveThreshold is number of points with power >= absolute_threshold 623 // numVecsAboveThreshold is number of points with power >= absolute_threshold
608 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation 624 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation
609 if(usingQueryPoint && numVecsAboveThreshold){ 625 if(usingQueryPoint && numVecsAboveThreshold){
619 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); 635 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
620 } else { 636 } else {
621 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); 637 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);
622 } 638 }
623 } 639 }
640 audiodb_index_delete_shingles(vv);
624 641
625 if(lsh_exact) 642 if(lsh_exact)
626 // Perform exact distance computation on point pairs in exact_evaluation_queue 643 // Perform exact distance computation on point pairs in exact_evaluation_queue
627 query_loop_points(adb, spec, qstate, query, &qpointers); 644 query_loop_points(adb, spec, qstate, query, &qpointers);
628 645