Mercurial > hg > audiodb
changeset 431:8632cd387e24 api-inversion
Punishment gluttony.
Continue teasing out vague orthogonalities by beginning the task of
using an adb_query_parameters_t. This does have the benefit of making
the distance calculation clearer, and we begin to see the shape of a
putative audiodb_query() emerging from the shrapnel of audioDB::query.
(Only the general shape; the detail is still a long, long way away).
author | mas01cr |
---|---|
date | Wed, 24 Dec 2008 10:55:40 +0000 |
parents | 2d14d21f826b |
children | 62a0515f59be |
files | audioDB.h index.cpp query.cpp |
diffstat | 3 files changed, 127 insertions(+), 92 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB.h Wed Dec 24 10:55:36 2008 +0000 +++ b/audioDB.h Wed Dec 24 10:55:40 2008 +0000 @@ -326,8 +326,8 @@ void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp); void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex); void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp); - void query_loop(adb_query_refine_t *refine, Uns32T queryIndex); - void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_refine_t *refine); + void query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, Uns32T queryIndex); + void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_parameters_t *params, adb_query_refine_t *refine); void initRNG(); void initDBHeader(const char *dbName); void initInputFile(const char *inFile, bool loadData = true); @@ -388,7 +388,7 @@ Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp); void index_make_shingle(vector<vector<float> >*, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen); int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp); - int index_query_loop(adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex); + int index_query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex); vector<vector<float> >* index_initialize_shingles(Uns32T sz); int index_init_query(const char* dbName); int index_exists(const char* dbName, double radius, Uns32T sequenceLength);
--- a/index.cpp Wed Dec 24 10:55:36 2008 +0000 +++ b/index.cpp Wed Dec 24 10:55:40 2008 +0000 @@ -575,7 +575,7 @@ // return 0: if index does not exist // return nqv: if index exists -int audioDB::index_query_loop(adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex) { +int audioDB::index_query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex) { unsigned int numVectors = 0; double *query = 0, *query_data = 0; @@ -583,6 +583,8 @@ double meanQdur = 0; void (*add_point_func)(void*,Uns32T,Uns32T,float); + normalizedDistance = (params->distance == ADB_DISTANCE_EUCLIDEAN_NORMED); + // Set the point-reporter callback based on the value of lsh_exact if(lsh_exact){ initialize_exact_evalutation_queue(); @@ -603,9 +605,6 @@ VERB_LOG(1, "retrieving tracks..."); - assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(trackNN>0 && trackNN<=O2_MAXNN); - gettimeofday(&tv1, NULL); // query vector index Uns32T Nq = (numVectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:numVectors) - sequenceLength + 1; @@ -640,7 +639,7 @@ if(lsh_exact) // Perform exact distance computation on point pairs in exact_evaluation_queue - query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors, refine); + query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors, params, refine); gettimeofday(&tv2,NULL); VERB_LOG(1,"elapsed time: %ld msec\n",
--- a/query.cpp Wed Dec 24 10:55:36 2008 +0000 +++ b/query.cpp Wed Dec 24 10:55:40 2008 +0000 @@ -20,7 +20,14 @@ void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { + // init database tables and dbH first + if(query_from_key) + initTables(dbName); + else + initTables(dbName, inFile); + adb_query_refine_t refine; + adb_query_parameters_t params; refine.flags = 0; /* FIXME: trackFile / ADB_REFINE_KEYLIST */ if(radius) { @@ -45,83 +52,114 @@ refine.hopsize = sequenceHop; } - // init database tables and dbH first - if(query_from_key) - initTables(dbName); - else - initTables(dbName, inFile); + switch(queryType) { + case O2_POINT_QUERY: + sequenceLength = 1; + params.accumulation = ADB_ACCUMULATION_DB; + params.distance = ADB_DISTANCE_DOT_PRODUCT; + params.npoints = pointNN; + params.ntracks = 0; + reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN); + break; + case O2_TRACK_QUERY: + sequenceLength = 1; + params.accumulation = ADB_ACCUMULATION_PER_TRACK; + params.distance = ADB_DISTANCE_DOT_PRODUCT; + params.npoints = pointNN; + params.ntracks = trackNN; + reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles); + break; + case O2_SEQUENCE_QUERY: + case O2_N_SEQUENCE_QUERY: + params.accumulation = ADB_ACCUMULATION_PER_TRACK; + params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; + params.npoints = pointNN; + params.ntracks = trackNN; + switch(queryType) { + case O2_SEQUENCE_QUERY: + if(!(refine.flags & ADB_REFINE_RADIUS)) { + reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles); + } else if (index_exists(dbName, radius, sequenceLength)) { + char* indexName = index_get_name(dbName, radius, sequenceLength); + lsh = index_allocate(indexName, false); + reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); + delete[] indexName; + } else { + reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); + } + break; + case O2_N_SEQUENCE_QUERY: + if(!(refine.flags & ADB_REFINE_RADIUS)) { + reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles); + } else if (index_exists(dbName, radius, sequenceLength)){ + char* indexName = index_get_name(dbName, radius, sequenceLength); + lsh = index_allocate(indexName, false); + reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); + delete[] indexName; + } else { + reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles); + } + break; + } + break; + case O2_ONE_TO_ONE_N_SEQUENCE_QUERY: + params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE; + params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; + params.npoints = 0; + params.ntracks = 0; + break; + default: + error("unrecognized queryType"); + } // keyKeyPos requires dbH to be initialized if(query_from_key && (!key || (query_from_key_index = audiodb_key_index(adb, key)) == (uint32_t) -1)) error("Query key not found", key); - - switch (queryType) { - case O2_POINT_QUERY: - sequenceLength = 1; - normalizedDistance = false; - reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN); - accumulator = new DBAccumulator<adb_result_dist_gt>(pointNN); - break; - case O2_TRACK_QUERY: - sequenceLength = 1; - normalizedDistance = false; - reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles); - accumulator = new PerTrackAccumulator<adb_result_dist_gt>(pointNN, trackNN); - break; - case O2_SEQUENCE_QUERY: - if(no_unit_norming) - normalizedDistance = false; - accumulator = new PerTrackAccumulator<adb_result_dist_lt>(pointNN, trackNN); - if(!(refine.flags & ADB_REFINE_RADIUS)) { - reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles); - } else { - if(index_exists(dbName, radius, sequenceLength)){ - char* indexName = index_get_name(dbName, radius, sequenceLength); - lsh = index_allocate(indexName, false); - reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); - delete[] indexName; - } - else - reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); + + switch(params.distance) { + case ADB_DISTANCE_DOT_PRODUCT: + switch(params.accumulation) { + case ADB_ACCUMULATION_DB: + accumulator = new DBAccumulator<adb_result_dist_gt>(params.npoints); + break; + case ADB_ACCUMULATION_PER_TRACK: + accumulator = new PerTrackAccumulator<adb_result_dist_gt>(params.npoints, params.ntracks); + break; + case ADB_ACCUMULATION_ONE_TO_ONE: + accumulator = new NearestAccumulator<adb_result_dist_gt>(); + break; + default: + error("unknown accumulation"); } break; - case O2_N_SEQUENCE_QUERY: - if(no_unit_norming) - normalizedDistance = false; - accumulator = new PerTrackAccumulator<adb_result_dist_lt>(pointNN, trackNN); - if(!(refine.flags & ADB_REFINE_RADIUS)) { - reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles); - } else { - if(index_exists(dbName, radius, sequenceLength)){ - char* indexName = index_get_name(dbName, radius, sequenceLength); - lsh = index_allocate(indexName, false); - reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); - delete[] indexName; - } - else - reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles); - } - break; - case O2_ONE_TO_ONE_N_SEQUENCE_QUERY : - accumulator = new NearestAccumulator<adb_result_dist_lt>(); - if(!(refine.flags & ADB_REFINE_RADIUS)) { - error("query-type not yet supported"); - } else { - reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles); + case ADB_DISTANCE_EUCLIDEAN_NORMED: + case ADB_DISTANCE_EUCLIDEAN: + switch(params.accumulation) { + case ADB_ACCUMULATION_DB: + accumulator = new DBAccumulator<adb_result_dist_lt>(params.npoints); + break; + case ADB_ACCUMULATION_PER_TRACK: + accumulator = new PerTrackAccumulator<adb_result_dist_lt>(params.npoints, params.ntracks); + break; + case ADB_ACCUMULATION_ONE_TO_ONE: + accumulator = new NearestAccumulator<adb_result_dist_lt>(); + break; + default: + error("unknown accumulation"); } break; default: - error("unrecognized queryType in query()"); - } - + error("unknown distance function"); + } + // Test for index (again) here if((refine.flags & ADB_REFINE_RADIUS) && index_exists(dbName, radius, sequenceLength)){ VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength); - index_query_loop(&refine, dbName, query_from_key_index); + index_query_loop(¶ms, &refine, dbName, query_from_key_index); } else{ VERB_LOG(1, "Calling brute-force query on database %s\n", dbName); - query_loop(&refine, query_from_key_index); + query_loop(¶ms, &refine, query_from_key_index); } adb_query_results_t *rs = accumulator->get_points(); @@ -515,7 +553,7 @@ // Postconditions: // reporter contains the points and distances that meet the reporter constraints -void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_refine_t *refine){ +void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_parameters_t *params, adb_query_refine_t *refine){ unsigned int dbVectors; double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0; double *meanDBdur = 0; @@ -533,9 +571,6 @@ VERB_LOG(1, "matching points..."); - assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(trackNN>0 && trackNN<=O2_MAXNN); - // We are guaranteed that the order of points is sorted by: // trackID, spos, qpos // so we can be relatively efficient in initialization of track data. @@ -597,13 +632,14 @@ dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength); double qn = qnPtr[qPos]; double sn = sNorm[sPos]; - if(normalizedDistance) + switch(params->distance) { + case ADB_DISTANCE_EUCLIDEAN_NORMED: dist = 2 - (2/(qn*sn))*dist; - else - if(no_unit_norming) - dist = qn*qn + sn*sn - 2*dist; - // else - // dist = dist; + break; + case ADB_DISTANCE_EUCLIDEAN: + dist = qn*qn + sn*sn - 2*dist; + break; + } if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE)) { adb_result_t r; r.key = fileTable + pp.trackID * O2_FILETABLE_ENTRY_SIZE; @@ -621,7 +657,7 @@ SAFE_DELETE_ARRAY(meanDBdur); } -void audioDB::query_loop(adb_query_refine_t *refine, Uns32T queryIndex) { +void audioDB::query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, Uns32T queryIndex) { unsigned int numVectors; double *query, *query_data; @@ -644,9 +680,6 @@ VERB_LOG(1, "matching tracks..."); - assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(trackNN>0 && trackNN<=O2_MAXNN); - unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; double **D = 0; // Differences query and target double **DD = 0; // Matched filter distance @@ -716,15 +749,18 @@ // Search for minimum distance by shingles (concatenated vectors) for(j = 0; j <= numVectors - wL; j += HOP_SIZE) { for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { - double thisDist; - if(normalizedDistance) + double thisDist = 0; + switch(params->distance) { + case ADB_DISTANCE_EUCLIDEAN_NORMED: thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; - else - if(no_unit_norming) - thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; - else - thisDist = DD[j][k]; - + break; + case ADB_DISTANCE_EUCLIDEAN: + thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; + break; + case ADB_DISTANCE_DOT_PRODUCT: + thisDist = DD[j][k]; + break; + } // Power test if ((!usingPower) || audiodb_powers_acceptable(refine, qpPtr[j], sPower[trackIndexOffset + k])) { // radius test