diff audioDB.h @ 498:342822c2d49a

Merge api-inversion branch (-r656:771, but I don't expect to return to that branch) into the trunk. I expect there to be minor performance regressions (e.g. in the SOAP server index cacheing, which I have forcibly removed) and minor unplugged memory leaks (e.g. in audioDB::query(), where I don't free up the datum). I hope that these leaks and performance regressions can be plugged in short order. I also expect that some (but maybe not all) of the issues currently addressed in the memory-leaks branch are superseded or fixed by this merge. There remains much work to be done; go forth and do it.
author mas01cr
date Sat, 10 Jan 2009 16:47:57 +0000
parents f9d86b1db21c
children da4b76190d43
line wrap: on
line diff
--- a/audioDB.h	Sat Jan 10 11:11:27 2009 +0000
+++ b/audioDB.h	Sat Jan 10 16:47:57 2009 +0000
@@ -11,6 +11,7 @@
 #include <iostream>
 #include <fstream>
 #include <set>
+#include <map>
 #include <string>
 #include <math.h>
 #include <sys/time.h>
@@ -20,13 +21,27 @@
 #include <gsl/gsl_rng.h>
 
 // includes for LSH indexing
+extern "C" {
+#include "audioDB_API.h"
+}
 #include "ReporterBase.h"
+#include "accumulator.h"
 #include "lshlib.h"
 
 // includes for web services
 #include "soapH.h"
 #include "cmdline.h"
 
+// should probably be rewritten
+class PointPair{
+ public:
+  Uns32T trackID;
+  Uns32T qpos;
+  Uns32T spos;
+  PointPair(Uns32T a, Uns32T b, Uns32T c);
+};
+bool operator<(const PointPair& a, const PointPair& b);
+
 #define MAXSTR 512
 
 // Databse PRIMARY commands
@@ -64,11 +79,6 @@
 #define COM_LSH_EXACT "--lsh_exact"
 #define COM_NO_UNIT_NORMING "--no_unit_norming"
 
-// Because LSH returns NN with P(1)<1 we want to return exact
-// points above this boundary. 
-// Because we work in Radius^2 units,
-// The sqrt of this number is the multiplier on the radius
-
 #define O2_OLD_MAGIC ('O'|'2'<<8|'D'<<16|'B'<<24)
 #define O2_MAGIC ('o'|'2'<<8|'d'<<16|'b'<<24)
 #define O2_FORMAT_VERSION (4U)
@@ -182,7 +192,6 @@
 #define SAFE_DELETE(PTR) delete PTR; PTR=0;
 #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;
 
-extern LSH* SERVER_LSH_INDEX_SINGLETON;
 extern char* SERVER_ADB_ROOT;
 extern char* SERVER_ADB_FEATURE_ROOT;
 
@@ -203,28 +212,6 @@
   off_t dbSize;
 } dbTableHeaderT, *dbTableHeaderPtr;
 
-typedef struct {
-
-    unsigned numFiles;
-    unsigned dim;
-    unsigned length;
-    unsigned dudCount;
-    unsigned nullCount;
-    unsigned flags;
-
-
-} cppstatus, *cppstatusptr;
-
-class PointPair{
- public:
-  Uns32T trackID;
-  Uns32T qpos;
-  Uns32T spos;
-  PointPair(Uns32T a, Uns32T b, Uns32T c);
-};
-
-bool operator<(const PointPair& a, const PointPair& b);
-
 class audioDB{  
  private:
   gengetopt_args_info args_info;
@@ -249,18 +236,14 @@
   int lshfid;
   bool forWrite;
   int infid;
-  char* db;
-  char* indata;
   struct stat statbuf;  
   dbTableHeaderPtr dbH;
+  struct adb *adb;
 
   gsl_rng *rng;
   
   char* fileTable;
   unsigned* trackTable;
-  off_t* trackOffsetTable;
-  double* dataBuf;
-  double* inBuf;
   double* l2normTable;
   double* timesTable;
   double* powerTable;
@@ -271,7 +254,6 @@
 
   size_t fileTableLength;
   size_t trackTableLength;
-  off_t dataBufLength;
   size_t timesTableLength;
   size_t powerTableLength;
   size_t l2normTableLength;
@@ -310,47 +292,21 @@
   double relative_threshold;
   
   ReporterBase* reporter;  // track/point reporter
-  priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >* exact_evaluation_queue;
   set<Uns32T> * allowed_keys;    // search restrict list by key
 
-  // Timers
-  struct timeval tv1;
-  struct timeval tv2;
-
   // LISZT parameters
   unsigned lisztOffset;
   unsigned lisztLength;
 
-  //for lib / API
-  int apierrortemp;
-  unsigned UseApiError;
-
   // private methods
   void error(const char* a, const char* b = "", const char *sysFunc = 0);
-  void sequence_sum(double *buffer, int length, int seqlen);
-  void sequence_sqrt(double *buffer, int length, int seqlen);
-  void sequence_average(double *buffer, int length, int seqlen);
 
-
-  void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD);
-  void delete_arrays(int track, unsigned int numVectors, double **D, double **DD);
-  void read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p);
-  void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp);
-  void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex);
-  void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp);
-  void query_loop(const char* dbName, Uns32T queryIndex);
-  void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors);
-  double dot_product_points(double* q, double* p, Uns32T  L);
+  void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
   void initRNG();
   void initDBHeader(const char *dbName);
-  void initInputFile(const char *inFile, bool loadData = true);
+  void initInputFile(const char *inFile);
   void initTables(const char* dbName, const char* inFile = 0);
   void initTablesFromKey(const char* dbName, const Uns32T queryIndex);
-  void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
-  void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
-  void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
-  void insertPowerData(unsigned n, int powerfd, double *powerdata);
-  unsigned getKeyPos(char* key);
   void prefix_name(char** const name, const char* prefix);
 
  public:
@@ -358,10 +314,6 @@
   audioDB(const unsigned argc, const char *argv[], adb__queryResponse *adbQueryResponse);
   audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse);
   audioDB(const unsigned argc, const char *argv[], adb__lisztResponse *adbLisztResponse);
-  audioDB(const unsigned argc, const char *argv[], int * apierror);
-  audioDB(const unsigned argc, const char *argv[], cppstatusptr stat, int * apierror);
-  audioDB(const unsigned argc, const char *argv[],adb__queryResponse *adbQueryResponse, int * apierror);
-
 
   void cleanup();
   ~audioDB();
@@ -369,21 +321,15 @@
   void get_lock(int fd, bool exclusive);
   void release_lock(int fd);
   void create(const char* dbName);
-  bool enough_per_file_space_free();
-  bool enough_data_space_free(off_t size);
-  void insert_data_vectors(off_t offset, void *buffer, size_t size);
   void insert(const char* dbName, const char* inFile);
   void batchinsert(const char* dbName, const char* inFile);
-  void batchinsert_large_adb(const char* dbName, const char* inFile);
   void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
   void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
-  void status(const char* dbName, cppstatusptr status);
 
   unsigned random_track(unsigned *propTable, unsigned total);
   void sample(const char *dbName);
   void l2norm(const char* dbName);
   void power_flag(const char *dbName);
-  bool powers_acceptable(double p1, double p2);
   void dump(const char* dbName);
   void liszt(const char* dbName, unsigned offset, unsigned numLines, adb__lisztResponse* adbLisztResponse=0);
 
@@ -399,12 +345,6 @@
   Uns32T lsh_param_N; // Number of rows per hash table
   Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration
   Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row
-  Uns32T lsh_n_point_bits; // How many bits to use to encode point ID within a track
-
-
-  // LSH vector<> containers for one in-core copy of a set of feature vectors
-  vector<float>::iterator vi; // feature vector iterator
-  vector<vector<float> > *vv;  // one-track's worth data
 
   // LSH indexing and retrieval methods  
   void index_index_db(const char* dbName);
@@ -412,20 +352,7 @@
   void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
   int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
   Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
-  void index_make_shingle(vector<vector<float> >*, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen);
-  int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp);
-  int index_query_loop(const char* dbName, Uns32T queryIndex);
-  vector<vector<float> >* index_initialize_shingles(Uns32T sz);
-  int index_init_query(const char* dbName);
-  int index_exists(const char* dbName, double radius, Uns32T sequenceLength);
-  char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength);
-  static void index_add_point(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method
-  static Uns32T index_to_trackID(Uns32T lshID, Uns32T nPntBits);  // Convert lsh point index to audioDB trackID
-  static Uns32T index_to_trackPos(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackPos (spos)
-  static Uns32T index_from_trackInfo(Uns32T trackID, Uns32T pntID, Uns32T nPntBits); // Convert audioDB trackID and trackPos to an lsh point index
-  void initialize_exact_evalutation_queue();
-  void index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos);
-  LSH* index_allocate(char* indexName, bool load_hashTables);
+  void insertPowerData(unsigned n, int powerfd, double *powerdata);
   void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
   void initialize_allowed_keys(std::ifstream*); // implementation of restrict lists using STL "set" class
   int is_in_allowed_keys(Uns32T trackID); // test method for allowed_keys used during search
@@ -460,14 +387,11 @@
     lshfid(0),					\
     forWrite(false),				\
     infid(0),					\
-    db(0),					\
-    indata(0),					\
     dbH(0),					\
+    adb(0),                                     \
     rng(0),                                     \
     fileTable(0),				\
     trackTable(0),				\
-    trackOffsetTable(0),                        \
-    dataBuf(0),					\
     l2normTable(0),				\
     timesTable(0),				\
     powerTable(0),                              \
@@ -476,7 +400,6 @@
     powerFileNameTable(0),                      \
     fileTableLength(0),				\
     trackTableLength(0),			\
-    dataBufLength(0),				\
     timesTableLength(0),			\
     powerTableLength(0),			\
     l2normTableLength(0),			\
@@ -502,18 +425,15 @@
     timesTol(0.1),				\
     radius(0),					\
     query_from_key(false),                      \
-    query_from_key_index(O2_ERR_KEYNOTFOUND),   \
+    query_from_key_index((uint32_t) -1),        \
     use_absolute_threshold(false),		\
     absolute_threshold(0.0),			\
     use_relative_threshold(false),		\
     relative_threshold(0.0),			\
     reporter(0),                                \
-    exact_evaluation_queue(0),                  \
     allowed_keys(0),                            \
     lisztOffset(0),                             \
     lisztLength(0),                             \
-    apierrortemp(0),                            \
-    UseApiError(0),                             \
     lsh(0),					\
     lsh_in_core(false),				\
     lsh_use_u_functions(false),                 \
@@ -523,7 +443,5 @@
     lsh_param_m(0),				\
     lsh_param_N(0),				\
     lsh_param_b(0),				\
-    lsh_param_ncols(0),                         \
-    lsh_n_point_bits(0),                        \
-    vv(0)
+    lsh_param_ncols(0)
 #endif