mas01cr@509: extern "C" { mas01cr@509: #include "audioDB_API.h" mas01cr@509: } mas01cr@509: #include "audioDB-internals.h" mas01cr@589: #include "lshlib.h" mas01cr@509: mas01cr@509: /* mas01cr@509: * Routines and datastructures which are specific to indexed queries. mas01cr@509: */ mas01cr@509: typedef struct adb_qcallback { mas01cr@509: adb_t *adb; mas01cr@509: adb_qstate_internal_t *qstate; mas01cr@509: } adb_qcallback_t; mas01cr@509: mas01cr@509: // return true if indexed query performed else return false mas01cr@509: int audiodb_index_init_query(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) { mas01cr@509: mas01cr@509: uint32_t sequence_length = spec->qid.sequence_length; mas01cr@509: double radius = spec->refine.radius; mas01cr@509: if(!(audiodb_index_exists(adb->path, radius, sequence_length))) mas01cr@509: return false; mas01cr@509: mas01cr@509: char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length); mas01cr@509: if(!indexName) { mas01cr@509: return false; mas01cr@509: } mas01cr@509: mas01cr@509: qstate->lsh = audiodb_index_allocate(adb, indexName, corep); mas01cr@672: qstate->qkey = spec->qid.datum->key; mas01cr@509: mas01cr@509: /* FIXME: it would be nice if the LSH library didn't make me do mas01cr@509: * this. */ mas01cr@509: if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) { mas01cr@509: delete qstate->lsh; mas01cr@509: qstate->lsh = audiodb_index_allocate(adb, indexName, true); mas01mc@513: #ifdef LSH_DUMP_CORE_TABLES mas01mc@513: qstate->lsh->dump_hashtables(); mas01mc@513: #endif mas01cr@509: } mas01cr@509: mas01cr@509: delete[] indexName; mas01cr@509: return true; mas01cr@509: } mas01cr@509: mas01cr@589: void audiodb_index_add_point_approximate(void *user_data, uint32_t pointID, uint32_t qpos, float dist) { mas01cr@509: adb_qcallback_t *data = (adb_qcallback_t *) user_data; mas01cr@509: adb_t *adb = data->adb; mas01cr@509: adb_qstate_internal_t *qstate = data->qstate; mas01mc@534: uint32_t trackID = audiodb_index_to_track_id(adb, pointID); mas01mc@534: uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID); mas01cr@509: std::set::iterator keys_end = qstate->allowed_keys->end(); mas01cr@509: if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { mas01cr@509: adb_result_t r; mas01cr@672: r.ikey = (*adb->keys)[trackID].c_str(); mas01cr@672: r.qkey = qstate->qkey; mas01cr@509: r.dist = dist; mas01cr@509: r.qpos = qpos; mas01cr@509: r.ipos = spos; mas01cr@610: if(qstate->set->find(r) == qstate->set->end()) { mas01cr@610: qstate->set->insert(r); mas01cr@610: qstate->accumulator->add_point(&r); mas01cr@610: } mas01cr@509: } mas01cr@509: } mas01cr@509: mas01cr@509: // Maintain a queue of points to pass to audiodb_query_queue_loop() mas01cr@509: // for exact evaluation mas01cr@589: void audiodb_index_add_point_exact(void *user_data, uint32_t pointID, uint32_t qpos, float dist) { mas01cr@509: adb_qcallback_t *data = (adb_qcallback_t *) user_data; mas01cr@509: adb_t *adb = data->adb; mas01cr@509: adb_qstate_internal_t *qstate = data->qstate; mas01mc@534: uint32_t trackID = audiodb_index_to_track_id(adb, pointID); mas01mc@534: uint32_t spos = audiodb_index_to_track_pos(adb, trackID, pointID); mas01cr@509: std::set::iterator keys_end = qstate->allowed_keys->end(); mas01cr@509: if(qstate->allowed_keys->find((*adb->keys)[trackID]) != keys_end) { mas01cr@509: PointPair p(trackID, qpos, spos); mas01cr@509: qstate->exact_evaluation_queue->push(p); mas01cr@509: } mas01cr@509: } mas01cr@509: mas01cr@509: // return -1 on error mas01cr@509: // return 0: if index does not exist mas01cr@509: // return nqv: if index exists mas01cr@509: int audiodb_index_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) { mas01mc@534: if(adb->header->flags>>28) mas01mc@534: cerr << "WARNING: Database created using deprecated LSH_N_POINT_BITS coding: REBUILD INDEXES..." << endl; mas01mc@534: mas01cr@509: double *query = 0, *query_data = 0; mas01cr@509: adb_qpointers_internal_t qpointers = {0}; mas01cr@509: mas01cr@509: adb_qcallback_t callback_data; mas01cr@509: callback_data.adb = adb; mas01cr@509: callback_data.qstate = qstate; mas01cr@509: mas01cr@509: void (*add_point_func)(void *, uint32_t, uint32_t, float); mas01cr@509: mas01cr@509: uint32_t sequence_length = spec->qid.sequence_length; mas01cr@509: bool normalized = (spec->params.distance == ADB_DISTANCE_EUCLIDEAN_NORMED); mas01cr@509: double radius = spec->refine.radius; mas01cr@509: bool use_absolute_threshold = spec->refine.flags & ADB_REFINE_ABSOLUTE_THRESHOLD; mas01cr@509: double absolute_threshold = spec->refine.absolute_threshold; mas01cr@509: mas01cr@610: qstate->set = new std::set< adb_result_t, adb_result_triple_lt >; mas01cr@610: mas01cr@509: if(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES) { mas01cr@509: add_point_func = &audiodb_index_add_point_approximate; mas01cr@509: } else { mas01cr@509: qstate->exact_evaluation_queue = new std::priority_queue; mas01cr@509: add_point_func = &audiodb_index_add_point_exact; mas01cr@509: } mas01cr@509: mas01cr@509: /* FIXME: this hardwired lsh_in_core is here to allow for a mas01cr@509: * transition period while the need for the argument is worked mas01cr@509: * through. Hopefully it will disappear again eventually. */ mas01cr@509: bool lsh_in_core = true; mas01cr@509: mas01cr@509: if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) { mas01cr@509: return 0; mas01cr@509: } mas01cr@509: mas01cr@509: char *database = audiodb_index_get_name(adb->path, radius, sequence_length); mas01cr@509: if(!database) { mas01cr@509: return -1; mas01cr@509: } mas01cr@509: mas01cr@509: if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { mas01cr@509: delete [] database; mas01cr@509: return -1; mas01cr@509: } mas01cr@509: mas01mc@534: uint32_t Nq = qpointers.nvectors - sequence_length + 1; mas01cr@509: std::vector > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequence_length); mas01cr@509: mas01cr@509: // Construct shingles from query features mas01cr@509: for(uint32_t pointID = 0; pointID < Nq; pointID++) { mas01cr@509: audiodb_index_make_shingle(vv, pointID, query, adb->header->dim, sequence_length); mas01cr@509: } mas01cr@509: mas01cr@509: // Normalize query vectors mas01cr@509: int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, adb->header->dim, sequence_length, radius, normalized, use_absolute_threshold, absolute_threshold); mas01cr@509: if(vcount == -1) { mas01cr@509: audiodb_index_delete_shingles(vv); mas01cr@509: delete [] database; mas01cr@509: return -1; mas01cr@509: } mas01cr@509: uint32_t numVecsAboveThreshold = vcount; mas01cr@509: mas01cr@509: // Nq contains number of inspected points in query file, mas01cr@509: // numVecsAboveThreshold is number of points with power >= absolute_threshold mas01cr@509: double *qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation mas01cr@509: if(!(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) && numVecsAboveThreshold) { mas01cr@509: if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) { mas01cr@509: qstate->lsh->retrieve_point((*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data); mas01cr@509: } else { mas01cr@509: qstate->lsh->serial_retrieve_point(database, (*vv)[0], spec->qid.sequence_start, add_point_func, &callback_data); mas01cr@509: } mas01cr@509: } else if(numVecsAboveThreshold) { mas01cr@509: for(uint32_t pointID = 0; pointID < Nq; pointID++) { mas01cr@509: if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) { mas01cr@509: if((qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2) || lsh_in_core) { mas01cr@509: qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data); mas01cr@509: } else { mas01cr@509: qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data); mas01cr@509: } mas01cr@509: } mas01cr@509: } mas01cr@509: } mas01cr@509: audiodb_index_delete_shingles(vv); mas01cr@509: mas01cr@509: if(!(spec->qid.flags & ADB_QID_FLAG_ALLOW_FALSE_POSITIVES)) { mas01cr@509: audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers); mas01cr@509: } mas01cr@610: mas01cr@610: delete qstate->set; mas01cr@610: mas01cr@509: mas01cr@509: // Clean up mas01cr@509: if(query_data) mas01cr@509: delete[] query_data; mas01cr@509: if(qpointers.l2norm_data) mas01cr@509: delete[] qpointers.l2norm_data; mas01cr@509: if(qpointers.power_data) mas01cr@509: delete[] qpointers.power_data; mas01cr@509: if(qpointers.mean_duration) mas01cr@509: delete[] qpointers.mean_duration; mas01cr@509: if(database) mas01cr@509: delete[] database; mas01cr@509: if(qstate->lsh != adb->cached_lsh) mas01cr@509: delete qstate->lsh; mas01cr@509: mas01cr@509: return Nq; mas01cr@509: }