Mercurial > hg > audiodb
changeset 610:e21a3db643af
MORE MEMORY SANITY
Move the logic tracking which points have been visited already
(including the std::set datastructure) into the indexed query codepaths,
rather than inside accumulators. This has the effect of drastically
reducing the memory used in non-indexed queries, such that the working
set for a 500-file database with 100000 vectors total goes from 1.2GB to
slightly under 3MB.
All this and less code, too!
author | mas01cr |
---|---|
date | Fri, 28 Aug 2009 17:14:06 +0000 |
parents | 368320b31db6 |
children | 957dd4fb2599 |
files | audioDB-internals.h dbaccumulator.h nearestaccumulator.h pertrackaccumulator.h query-indexed.cpp query.cpp |
diffstat | 6 files changed, 55 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB-internals.h Fri Aug 21 15:23:32 2009 +0000 +++ b/audioDB-internals.h Fri Aug 28 17:14:06 2009 +0000 @@ -67,21 +67,6 @@ double *mean_duration; } adb_qpointers_internal_t; -/* this struct is for maintaining per-query state. We don't want to - * store this stuff in the adb struct itself, because (a) it doesn't - * belong there and (b) in principle people might do two queries in - * parallel using the same adb handle. (b) is in practice a little - * bit academic because at the moment we're seeking all over the disk - * using adb->fd, but changing to use pread() might win us - * threadsafety eventually. - */ -typedef struct adb_qstate_internal { - Accumulator *accumulator; - std::set<std::string> *allowed_keys; - std::priority_queue<PointPair> *exact_evaluation_queue; - LSH *lsh; -} adb_qstate_internal_t; - /* this struct is the in-memory representation of the binary * information stored at the head of each adb file */ typedef struct adbheader { @@ -168,6 +153,22 @@ } } adb_result_triple_lt; +/* this struct is for maintaining per-query state. We don't want to + * store this stuff in the adb struct itself, because (a) it doesn't + * belong there and (b) in principle people might do two queries in + * parallel using the same adb handle. (b) is in practice a little + * bit academic because at the moment we're seeking all over the disk + * using adb->fd, but changing to use pread() might win us + * threadsafety eventually. + */ +typedef struct adb_qstate_internal { + Accumulator *accumulator; + std::set<std::string> *allowed_keys; + std::priority_queue<PointPair> *exact_evaluation_queue; + std::set< adb_result_t, adb_result_triple_lt > *set; + LSH *lsh; +} adb_qstate_internal_t; + /* We could go gcc-specific here and use typeof() instead of passing * in an explicit type. Answers on a postcard as to whether that's a * good plan or not. */
--- a/dbaccumulator.h Fri Aug 21 15:23:32 2009 +0000 +++ b/dbaccumulator.h Fri Aug 28 17:14:06 2009 +0000 @@ -7,32 +7,24 @@ private: unsigned int pointNN; std::priority_queue< adb_result_t, std::vector<adb_result_t>, T > *queue; - std::set< adb_result_t, adb_result_triple_lt > *set; }; template <class T> DBAccumulator<T>::DBAccumulator(unsigned int pointNN) - : pointNN(pointNN), queue(0), set(0) { + : pointNN(pointNN), queue(0) { queue = new std::priority_queue< adb_result_t, std::vector<adb_result_t>, T>; - set = new std::set<adb_result_t, adb_result_triple_lt>; } template <class T> DBAccumulator<T>::~DBAccumulator() { if(queue) { delete queue; } - if(set) { - delete set; - } } template <class T> void DBAccumulator<T>::add_point(adb_result_t *r) { if(!isnan(r->dist)) { - if(set->find(*r) == set->end()) { - set->insert(*r); - queue->push(*r); - if(queue->size() > pointNN) { - queue->pop(); - } + queue->push(*r); + if(queue->size() > pointNN) { + queue->pop(); } } }
--- a/nearestaccumulator.h Fri Aug 21 15:23:32 2009 +0000 +++ b/nearestaccumulator.h Fri Aug 28 17:14:06 2009 +0000 @@ -5,20 +5,15 @@ void add_point(adb_result_t *r); adb_query_results_t *get_points(); private: - std::set< adb_result_t, adb_result_triple_lt > *set; std::set< adb_result_t, adb_result_qpos_lt > *points; }; template <class T> NearestAccumulator<T>::NearestAccumulator() - : set(0), points(0) { - set = new std::set< adb_result_t, adb_result_triple_lt >; + : points(0) { points = new std::set< adb_result_t, adb_result_qpos_lt >; } template <class T> NearestAccumulator<T>::~NearestAccumulator() { - if(set) { - delete set; - } if(points) { delete points; } @@ -26,17 +21,13 @@ template <class T> void NearestAccumulator<T>::add_point(adb_result_t *r) { if(!isnan(r->dist)) { - if(set->find(*r) == set->end()) { - set->insert(*r); - - std::set< adb_result_t, adb_result_qpos_lt >::iterator it; - it = points->find(*r); - if(it == points->end()) { - points->insert(*r); - } else if(T()(*(const adb_result_t *)r,(*it))) { - points->erase(it); - points->insert(*r); - } + std::set< adb_result_t, adb_result_qpos_lt >::iterator it; + it = points->find(*r); + if(it == points->end()) { + points->insert(*r); + } else if(T()(*(const adb_result_t *)r,(*it))) { + points->erase(it); + points->insert(*r); } } }
--- a/pertrackaccumulator.h Fri Aug 21 15:23:32 2009 +0000 +++ b/pertrackaccumulator.h Fri Aug 28 17:14:06 2009 +0000 @@ -8,13 +8,11 @@ unsigned int pointNN; unsigned int trackNN; std::map<adb_result_t, std::priority_queue< adb_result_t, std::vector<adb_result_t>, T > *, adb_result_key_lt> *queues; - std::set< adb_result_t, adb_result_triple_lt > *set; }; template <class T> PerTrackAccumulator<T>::PerTrackAccumulator(unsigned int pointNN, unsigned int trackNN) - : pointNN(pointNN), trackNN(trackNN), queues(0), set(0) { + : pointNN(pointNN), trackNN(trackNN), queues(0) { queues = new std::map<adb_result_t, std::priority_queue< adb_result_t, std::vector<adb_result_t>, T > *, adb_result_key_lt>; - set = new std::set< adb_result_t, adb_result_triple_lt >; } template <class T> PerTrackAccumulator<T>::~PerTrackAccumulator() { @@ -25,30 +23,23 @@ } delete queues; } - if(set) { - delete set; - } } template <class T> void PerTrackAccumulator<T>::add_point(adb_result_t *r) { if(!isnan(r->dist)) { - if(set->find(*r) == set->end()) { - set->insert(*r); - - typename std::map< adb_result_t, std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *, adb_result_key_lt>::iterator it; - std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *queue; - it = queues->find(*r); - if(it == queues->end()) { - queue = new std::priority_queue< adb_result_t, std::vector< adb_result_t >, T >; - (*queues)[*r] = queue; - } else { - queue = (*it).second; - } - - queue->push(*r); - if(queue->size() > pointNN) { - queue->pop(); - } + typename std::map< adb_result_t, std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *, adb_result_key_lt>::iterator it; + std::priority_queue< adb_result_t, std::vector< adb_result_t >, T > *queue; + it = queues->find(*r); + if(it == queues->end()) { + queue = new std::priority_queue< adb_result_t, std::vector< adb_result_t >, T >; + (*queues)[*r] = queue; + } else { + queue = (*it).second; + } + + queue->push(*r); + if(queue->size() > pointNN) { + queue->pop(); } } }
--- a/query-indexed.cpp Fri Aug 21 15:23:32 2009 +0000 +++ b/query-indexed.cpp Fri Aug 28 17:14:06 2009 +0000 @@ -54,7 +54,10 @@ r.dist = dist; r.qpos = qpos; r.ipos = spos; - qstate->accumulator->add_point(&r); + if(qstate->set->find(r) == qstate->set->end()) { + qstate->set->insert(r); + qstate->accumulator->add_point(&r); + } } } @@ -95,6 +98,8 @@ bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD; double absolute_threshold = spec->refine.absolute_threshold; + qstate->set = new std::set< adb_result_t, adb_result_triple_lt >; + if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) { add_point_func = &audiodb_index_add_point_approximate; } else { @@ -163,6 +168,9 @@ if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) { audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers); } + + delete qstate->set; + // Clean up if(query_data)
--- a/query.cpp Fri Aug 21 15:23:32 2009 +0000 +++ b/query.cpp Fri Aug 28 17:14:06 2009 +0000 @@ -477,7 +477,10 @@ r.dist = dist; r.qpos = pp.qpos; r.ipos = pp.spos; - qstate->accumulator->add_point(&r); + if(qstate->set->find(r) == qstate->set->end()) { + qstate->set->insert(r); + qstate->accumulator->add_point(&r); + } } } qstate->exact_evaluation_queue->pop();