Mercurial > hg > audiodb
diff audioDB.h @ 498:342822c2d49a
Merge api-inversion branch (-r656:771, but I don't expect to return to
that branch) into the trunk.
I expect there to be minor performance regressions (e.g. in the SOAP
server index cacheing, which I have forcibly removed) and minor
unplugged memory leaks (e.g. in audioDB::query(), where I don't free up
the datum). I hope that these leaks and performance regressions can be
plugged in short order. I also expect that some (but maybe not all) of
the issues currently addressed in the memory-leaks branch are superseded
or fixed by this merge.
There remains much work to be done; go forth and do it.
author | mas01cr |
---|---|
date | Sat, 10 Jan 2009 16:47:57 +0000 |
parents | f9d86b1db21c |
children | da4b76190d43 |
line wrap: on
line diff
--- a/audioDB.h Sat Jan 10 11:11:27 2009 +0000 +++ b/audioDB.h Sat Jan 10 16:47:57 2009 +0000 @@ -11,6 +11,7 @@ #include <iostream> #include <fstream> #include <set> +#include <map> #include <string> #include <math.h> #include <sys/time.h> @@ -20,13 +21,27 @@ #include <gsl/gsl_rng.h> // includes for LSH indexing +extern "C" { +#include "audioDB_API.h" +} #include "ReporterBase.h" +#include "accumulator.h" #include "lshlib.h" // includes for web services #include "soapH.h" #include "cmdline.h" +// should probably be rewritten +class PointPair{ + public: + Uns32T trackID; + Uns32T qpos; + Uns32T spos; + PointPair(Uns32T a, Uns32T b, Uns32T c); +}; +bool operator<(const PointPair& a, const PointPair& b); + #define MAXSTR 512 // Databse PRIMARY commands @@ -64,11 +79,6 @@ #define COM_LSH_EXACT "--lsh_exact" #define COM_NO_UNIT_NORMING "--no_unit_norming" -// Because LSH returns NN with P(1)<1 we want to return exact -// points above this boundary. -// Because we work in Radius^2 units, -// The sqrt of this number is the multiplier on the radius - #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24) #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24) #define O2_FORMAT_VERSION (4U) @@ -182,7 +192,6 @@ #define SAFE_DELETE(PTR) delete PTR; PTR=0; #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0; -extern LSH* SERVER_LSH_INDEX_SINGLETON; extern char* SERVER_ADB_ROOT; extern char* SERVER_ADB_FEATURE_ROOT; @@ -203,28 +212,6 @@ off_t dbSize; } dbTableHeaderT, *dbTableHeaderPtr; -typedef struct { - - unsigned numFiles; - unsigned dim; - unsigned length; - unsigned dudCount; - unsigned nullCount; - unsigned flags; - - -} cppstatus, *cppstatusptr; - -class PointPair{ - public: - Uns32T trackID; - Uns32T qpos; - Uns32T spos; - PointPair(Uns32T a, Uns32T b, Uns32T c); -}; - -bool operator<(const PointPair& a, const PointPair& b); - class audioDB{ private: gengetopt_args_info args_info; @@ -249,18 +236,14 @@ int lshfid; bool forWrite; int infid; - char* db; - char* indata; struct stat statbuf; dbTableHeaderPtr dbH; + struct adb *adb; gsl_rng *rng; char* fileTable; unsigned* trackTable; - off_t* trackOffsetTable; - double* dataBuf; - double* inBuf; double* l2normTable; double* timesTable; double* powerTable; @@ -271,7 +254,6 @@ size_t fileTableLength; size_t trackTableLength; - off_t dataBufLength; size_t timesTableLength; size_t powerTableLength; size_t l2normTableLength; @@ -310,47 +292,21 @@ double relative_threshold; ReporterBase* reporter; // track/point reporter - priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >* exact_evaluation_queue; set<Uns32T> * allowed_keys; // search restrict list by key - // Timers - struct timeval tv1; - struct timeval tv2; - // LISZT parameters unsigned lisztOffset; unsigned lisztLength; - //for lib / API - int apierrortemp; - unsigned UseApiError; - // private methods void error(const char* a, const char* b = "", const char *sysFunc = 0); - void sequence_sum(double *buffer, int length, int seqlen); - void sequence_sqrt(double *buffer, int length, int seqlen); - void sequence_average(double *buffer, int length, int seqlen); - - void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD); - void delete_arrays(int track, unsigned int numVectors, double **D, double **DD); - void read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p); - void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp); - void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex); - void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp); - void query_loop(const char* dbName, Uns32T queryIndex); - void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors); - double dot_product_points(double* q, double* p, Uns32T L); + void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); void initRNG(); void initDBHeader(const char *dbName); - void initInputFile(const char *inFile, bool loadData = true); + void initInputFile(const char *inFile); void initTables(const char* dbName, const char* inFile = 0); void initTablesFromKey(const char* dbName, const Uns32T queryIndex); - void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); - void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append); - void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); - void insertPowerData(unsigned n, int powerfd, double *powerdata); - unsigned getKeyPos(char* key); void prefix_name(char** const name, const char* prefix); public: @@ -358,10 +314,6 @@ audioDB(const unsigned argc, const char *argv[], adb__queryResponse *adbQueryResponse); audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse); audioDB(const unsigned argc, const char *argv[], adb__lisztResponse *adbLisztResponse); - audioDB(const unsigned argc, const char *argv[], int * apierror); - audioDB(const unsigned argc, const char *argv[], cppstatusptr stat, int * apierror); - audioDB(const unsigned argc, const char *argv[],adb__queryResponse *adbQueryResponse, int * apierror); - void cleanup(); ~audioDB(); @@ -369,21 +321,15 @@ void get_lock(int fd, bool exclusive); void release_lock(int fd); void create(const char* dbName); - bool enough_per_file_space_free(); - bool enough_data_space_free(off_t size); - void insert_data_vectors(off_t offset, void *buffer, size_t size); void insert(const char* dbName, const char* inFile); void batchinsert(const char* dbName, const char* inFile); - void batchinsert_large_adb(const char* dbName, const char* inFile); void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); void status(const char* dbName, adb__statusResponse *adbStatusResponse=0); - void status(const char* dbName, cppstatusptr status); unsigned random_track(unsigned *propTable, unsigned total); void sample(const char *dbName); void l2norm(const char* dbName); void power_flag(const char *dbName); - bool powers_acceptable(double p1, double p2); void dump(const char* dbName); void liszt(const char* dbName, unsigned offset, unsigned numLines, adb__lisztResponse* adbLisztResponse=0); @@ -399,12 +345,6 @@ Uns32T lsh_param_N; // Number of rows per hash table Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row - Uns32T lsh_n_point_bits; // How many bits to use to encode point ID within a track - - - // LSH vector<> containers for one in-core copy of a set of feature vectors - vector<float>::iterator vi; // feature vector iterator - vector<vector<float> > *vv; // one-track's worth data // LSH indexing and retrieval methods void index_index_db(const char* dbName); @@ -412,20 +352,7 @@ void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp); Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp); - void index_make_shingle(vector<vector<float> >*, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen); - int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp); - int index_query_loop(const char* dbName, Uns32T queryIndex); - vector<vector<float> >* index_initialize_shingles(Uns32T sz); - int index_init_query(const char* dbName); - int index_exists(const char* dbName, double radius, Uns32T sequenceLength); - char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength); - static void index_add_point(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method - static Uns32T index_to_trackID(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackID - static Uns32T index_to_trackPos(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackPos (spos) - static Uns32T index_from_trackInfo(Uns32T trackID, Uns32T pntID, Uns32T nPntBits); // Convert audioDB trackID and trackPos to an lsh point index - void initialize_exact_evalutation_queue(); - void index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos); - LSH* index_allocate(char* indexName, bool load_hashTables); + void insertPowerData(unsigned n, int powerfd, double *powerdata); void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); void initialize_allowed_keys(std::ifstream*); // implementation of restrict lists using STL "set" class int is_in_allowed_keys(Uns32T trackID); // test method for allowed_keys used during search @@ -460,14 +387,11 @@ lshfid(0), \ forWrite(false), \ infid(0), \ - db(0), \ - indata(0), \ dbH(0), \ + adb(0), \ rng(0), \ fileTable(0), \ trackTable(0), \ - trackOffsetTable(0), \ - dataBuf(0), \ l2normTable(0), \ timesTable(0), \ powerTable(0), \ @@ -476,7 +400,6 @@ powerFileNameTable(0), \ fileTableLength(0), \ trackTableLength(0), \ - dataBufLength(0), \ timesTableLength(0), \ powerTableLength(0), \ l2normTableLength(0), \ @@ -502,18 +425,15 @@ timesTol(0.1), \ radius(0), \ query_from_key(false), \ - query_from_key_index(O2_ERR_KEYNOTFOUND), \ + query_from_key_index((uint32_t) -1), \ use_absolute_threshold(false), \ absolute_threshold(0.0), \ use_relative_threshold(false), \ relative_threshold(0.0), \ reporter(0), \ - exact_evaluation_queue(0), \ allowed_keys(0), \ lisztOffset(0), \ lisztLength(0), \ - apierrortemp(0), \ - UseApiError(0), \ lsh(0), \ lsh_in_core(false), \ lsh_use_u_functions(false), \ @@ -523,7 +443,5 @@ lsh_param_m(0), \ lsh_param_N(0), \ lsh_param_b(0), \ - lsh_param_ncols(0), \ - lsh_n_point_bits(0), \ - vv(0) + lsh_param_ncols(0) #endif