Mercurial > hg > audiodb
comparison index.cpp @ 459:fcc6f7c4856b api-inversion
No more global shingle vector of vectors.
Convert audioDB::index_initialize_shingles and
audioDB::index_norm_shingles to plain old functions. In doing so, the
latter in particular acquires a silly argument list; we need that
complexity for now because it's called both from audioDB::query (which
we're currently inverting) and from audioDB::index (which is out of
scope for now).
The loss of the global vv thing made me check up on memory discipline
[hence the new API function audiodb_query_free_results() as well as the
internal audiodb_index_delete_shingles()]. It's not too bad, but there
are plenty of leaks for those with time to do
AUDIODB="valgrind --leak-check=full ../../audioDB" sh ./run-test.sh
on their favourite test case. For example, the Radius reporters leak
one triple per hit.
(Honestly, C++ memory management is teh suck.)
author | mas01cr |
---|---|
date | Sun, 28 Dec 2008 22:43:50 +0000 |
parents | 913a95f06998 |
children | 17003dff8127 |
comparison
equal
deleted
inserted
replaced
458:913a95f06998 | 459:fcc6f7c4856b |
---|---|
60 } | 60 } |
61 assert(audioDB::lsh); | 61 assert(audioDB::lsh); |
62 return audioDB::lsh; | 62 return audioDB::lsh; |
63 } | 63 } |
64 | 64 |
65 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){ | 65 vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) { |
66 if(vv) | 66 std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz); |
67 delete vv; | 67 for(Uns32T i=0 ; i < sz ; i++) { |
68 vv = new vector<vector<float> >(sz); | 68 (*vv)[i]=vector<float>(dim * seqLen); |
69 for(Uns32T i=0 ; i < sz ; i++) | 69 } |
70 (*vv)[i]=vector<float>(dbH->dim*sequenceLength); // allocate shingle storage | |
71 return vv; | 70 return vv; |
71 } | |
72 | |
73 void audiodb_index_delete_shingles(vector<vector<float> > *vv) { | |
74 delete vv; | |
72 } | 75 } |
73 | 76 |
74 /******************** LSH indexing audioDB database access forall s \in {S} ***********************/ | 77 /******************** LSH indexing audioDB database access forall s \in {S} ***********************/ |
75 | 78 |
76 // Prepare the AudioDB database for read access and allocate auxillary memory | 79 // Prepare the AudioDB database for read access and allocate auxillary memory |
152 } | 155 } |
153 | 156 |
154 // norm shingles | 157 // norm shingles |
155 // in-place norming, no deletions | 158 // in-place norming, no deletions |
156 // If using power, return number of shingles above power threshold | 159 // If using power, return number of shingles above power threshold |
157 int audioDB::index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp){ | 160 int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) { |
158 int z = 0; // number of above-threshold shingles | 161 int z = 0; // number of above-threshold shingles |
159 float l2norm; | 162 float l2norm; |
160 double power; | 163 double power; |
161 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2 | 164 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2 |
162 float oneOverSqrtl2NormDivRad = oneOverRadius; | 165 float oneOverSqrtl2NormDivRad = oneOverRadius; |
163 if(!spp) | 166 Uns32T shingleSize = seqLen * dim; |
164 error("LSH indexing and query requires a power feature using -w or -W"); | 167 |
165 Uns32T shingleSize = sequenceLength*dbH->dim; | 168 if(!spp) { |
169 return -1; | |
170 } | |
166 for(Uns32T a=0; a<(*vv).size(); a++){ | 171 for(Uns32T a=0; a<(*vv).size(); a++){ |
167 l2norm = (float)(*snp++); | 172 l2norm = (float)(*snp++); |
168 if(audioDB::normalizedDistance) | 173 if(normed_vectors) |
169 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius; | 174 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius; |
170 | 175 |
171 for(Uns32T b=0; b < shingleSize ; b++) | 176 for(Uns32T b=0; b < shingleSize ; b++) |
172 (*vv)[a][b]*=oneOverSqrtl2NormDivRad; | 177 (*vv)[a][b]*=oneOverSqrtl2NormDivRad; |
173 | 178 |
174 power = *spp++; | 179 power = *spp++; |
175 if(use_absolute_threshold){ | 180 if(use_pthreshold){ |
176 if ( power >= absolute_threshold ) | 181 if (power >= pthreshold) |
177 z++; | 182 z++; |
178 } | 183 } |
179 else | 184 else |
180 z++; | 185 z++; |
181 } | 186 } |
436 } | 441 } |
437 } | 442 } |
438 | 443 |
439 Uns32T numVecsAboveThreshold = 0, collisionCount = 0; | 444 Uns32T numVecsAboveThreshold = 0, collisionCount = 0; |
440 if(numVecs){ | 445 if(numVecs){ |
441 vv = index_initialize_shingles(numVecs); | 446 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(numVecs, dbH->dim, sequenceLength); |
442 | 447 |
443 for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ ) | 448 for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ ) |
444 audiodb_index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength); | 449 audiodb_index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength); |
445 | 450 int vcount = audiodb_index_norm_shingles(vv, *snpp, *sppp, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold); |
446 numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp); | 451 if(vcount == -1) { |
452 audiodb_index_delete_shingles(vv); | |
453 error("failed to norm shingles"); | |
454 } | |
455 numVecsAboveThreshold = vcount; | |
447 collisionCount = index_insert_shingles(vv, trackID, *sppp); | 456 collisionCount = index_insert_shingles(vv, trackID, *sppp); |
448 } | 457 audiodb_index_delete_shingles(vv); |
458 } | |
459 | |
449 float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0; | 460 float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0; |
450 | 461 |
451 /* index_norm_shingles() only goes as far as the end of the | 462 /* audiodb_index_norm_shingles() only goes as far as the end of the |
452 sequence, which is right, but the space allocated is for the | 463 sequence, which is right, but the space allocated is for the |
453 whole track. */ | 464 whole track. */ |
454 | 465 |
455 /* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN | 466 /* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN |
456 * So let's be certain the pointers are in the correct place | 467 * So let's be certain the pointers are in the correct place |
592 error("failed to set up qpointers"); | 603 error("failed to set up qpointers"); |
593 } | 604 } |
594 | 605 |
595 // query vector index | 606 // query vector index |
596 Uns32T Nq = (qpointers.nvectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:qpointers.nvectors) - sequenceLength + 1; | 607 Uns32T Nq = (qpointers.nvectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:qpointers.nvectors) - sequenceLength + 1; |
597 vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles | 608 std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequenceLength); // allocate memory to copy query vectors to shingles |
598 | 609 |
599 // Construct shingles from query features | 610 // Construct shingles from query features |
600 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ ) | 611 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ ) |
601 audiodb_index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength); | 612 audiodb_index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength); |
602 | 613 |
603 // Normalize query vectors | 614 // Normalize query vectors |
604 Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qpointers.l2norm, qpointers.power ); | 615 int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold); |
616 if(vcount == -1) { | |
617 audiodb_index_delete_shingles(vv); | |
618 error("failed to norm shingles"); | |
619 } | |
620 Uns32T numVecsAboveThreshold = vcount; | |
605 | 621 |
606 // Nq contains number of inspected points in query file, | 622 // Nq contains number of inspected points in query file, |
607 // numVecsAboveThreshold is number of points with power >= absolute_threshold | 623 // numVecsAboveThreshold is number of points with power >= absolute_threshold |
608 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation | 624 double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation |
609 if(usingQueryPoint && numVecsAboveThreshold){ | 625 if(usingQueryPoint && numVecsAboveThreshold){ |
619 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); | 635 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); |
620 } else { | 636 } else { |
621 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); | 637 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); |
622 } | 638 } |
623 } | 639 } |
640 audiodb_index_delete_shingles(vv); | |
624 | 641 |
625 if(lsh_exact) | 642 if(lsh_exact) |
626 // Perform exact distance computation on point pairs in exact_evaluation_queue | 643 // Perform exact distance computation on point pairs in exact_evaluation_queue |
627 query_loop_points(adb, spec, qstate, query, &qpointers); | 644 query_loop_points(adb, spec, qstate, query, &qpointers); |
628 | 645 |