Mercurial > hg > audiodb
diff audioDB.h @ 324:c93be2f3a674
Merge of branches/large_adb -r 514:524 onto the trunk. No conflicts. Added LARGE_ADB support. Turn on with --ntracks 20001 or greater. Use --adb_feature_root to locate feature files at QUERY time. A bug fix in LSH indexing that was incorrectly thresholding large numbers of shingles.
author | mas01mc |
---|---|
date | Thu, 21 Aug 2008 21:28:33 +0000 |
parents | b671a46873c2 |
children | 7ff56cce3297 |
line wrap: on
line diff
--- a/audioDB.h Tue Aug 12 14:25:51 2008 +0000 +++ b/audioDB.h Thu Aug 21 21:28:33 2008 +0000 @@ -80,16 +80,20 @@ #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size // Bit masks for packing (trackID,pointID) into 32-bit unsigned int -#define LSH_N_POINT_BITS 14 -#define LSH_TRACK_MASK 0xFFFFC000U // 2^18 = 262144 tracks -#define LSH_POINT_MASK 0x00003FFFU // 2^14 = 16384 points per track +// This can be controlled at compile time +#define O2_DEFAULT_LSH_N_POINT_BITS 14 + +// Override the default point bit width for large database support +#ifndef LSH_N_POINT_BITS +#define LSH_N_POINT_BITS O2_DEFAULT_LSH_N_POINT_BITS +#endif // LIMIT PARAMETERS #define O2_DEFAULT_DATASIZE (1355U) // in MB #define O2_DEFAULT_NTRACKS (20000U) #define O2_DEFAULT_DATADIM (9U) #define O2_REALTYPE (double) -#define O2_MAXFILES (20000U) +#define O2_MAXFILES (1000000U) #define O2_MAXFILESTR (256U) #define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR) #define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned)) @@ -98,17 +102,21 @@ #define O2_MAXDIM (2000U) #define O2_MAXNN (1000000U) #define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence -#define O2_MAXTRACKS (10000U) // maximum number of tracks -#define O2_MAXTRACKLEN ((LSH_POINT_MASK+1)) // maximum shingles in a track +#define O2_MAXTRACKS (1000000U) // maximum number of tracks +#define O2_MAXTRACKLEN (1<<LSH_N_POINT_BITS) // maximum shingles in a track #define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB #define O2_DISTANCE_TOLERANCE (1e-6) -#define O2_SERIAL_MAX_TRACKBATCH (10000) +#define O2_SERIAL_MAX_TRACKBATCH (1000000) +#define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes) +#define O2_LARGE_ADB_NTRACKS (O2_DEFAULT_NTRACKS+1) // ntracks at which features are kept externally +#define O2_MAX_VECTORS ( O2_MEANNUMVECTORS * O2_MAXTRACKS ) // Flags #define O2_FLAG_L2NORM (0x1U) #define O2_FLAG_MINMAX (0x2U) #define O2_FLAG_POWER (0x4U) #define O2_FLAG_TIMES (0x20U) +#define O2_FLAG_LARGE_ADB (0x40U) #define DISPLAY_FLAG(x) (x?"on":"off") // Query types @@ -146,7 +154,17 @@ fflush(stderr); \ } +// We will only use this in a 32-bit address space +// So map the off_t down to 32-bits first +#define INSERT_FILETABLE_STRING(TABLE, STR) \ + strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR)); + +#define SAFE_DELETE(PTR) delete PTR; PTR=0; +#define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0; + extern LSH* SERVER_LSH_INDEX_SINGLETON; +extern char* SERVER_ADB_ROOT; +extern char* SERVER_ADB_FEATURE_ROOT; typedef struct dbTableHeader { uint32_t magic; @@ -192,8 +210,10 @@ std::ifstream *timesFile; const char *powerFileName; std::ifstream *powerFile; + const char* adb_root; + const char* adb_feature_root; + int powerfd; - int dbfid; int lshfid; bool forWrite; @@ -205,15 +225,19 @@ gsl_rng *rng; - char *fileTable; + char* fileTable; unsigned* trackTable; - off_t *trackOffsetTable; + off_t* trackOffsetTable; double* dataBuf; double* inBuf; double* l2normTable; double* timesTable; double* powerTable; + char* featureFileNameTable; + char* timesFileNameTable; + char* powerFileNameTable; + size_t fileTableLength; size_t trackTableLength; off_t dataBufLength; @@ -269,7 +293,7 @@ void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD); void delete_arrays(int track, unsigned int numVectors, double **D, double **DD); - void read_data(int track, double **data_buffer_p, size_t *data_buffer_size_p); + void read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p); void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp); void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex); void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp); @@ -278,7 +302,7 @@ double dot_product_points(double* q, double* p, Uns32T L); void initRNG(); void initDBHeader(const char *dbName); - void initInputFile(const char *inFile); + void initInputFile(const char *inFile, bool loadData = true); void initTables(const char* dbName, const char* inFile = 0); void initTablesFromKey(const char* dbName, const Uns32T queryIndex); void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); @@ -286,6 +310,8 @@ void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); void insertPowerData(unsigned n, int powerfd, double *powerdata); unsigned getKeyPos(char* key); + void prefix_name(char** const name, const char* prefix); + public: audioDB(const unsigned argc, char* const argv[]); audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse); @@ -301,6 +327,7 @@ void insert_data_vectors(off_t offset, void *buffer, size_t size); void insert(const char* dbName, const char* inFile); void batchinsert(const char* dbName, const char* inFile); + void batchinsert_large_adb(const char* dbName, const char* inFile); void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); void status(const char* dbName, adb__statusResponse *adbStatusResponse=0); unsigned random_track(unsigned *propTable, unsigned total); @@ -322,6 +349,8 @@ Uns32T lsh_param_N; // Number of rows per hash table Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row + Uns32T lsh_n_point_bits; // How many bits to use to encode point ID within a track + // LSH vector<> containers for one in-core copy of a set of feature vectors vector<float>::iterator vi; // feature vector iterator @@ -342,13 +371,14 @@ char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength); static void index_add_point_approximate(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method static void index_add_point_exact(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method - static Uns32T index_to_trackID(Uns32T lshID); // Convert lsh point index to audioDB trackID - static Uns32T index_to_trackPos(Uns32T lshID); // Convert lsh point index to audioDB trackPos (spos) - static Uns32T index_from_trackInfo(Uns32T, Uns32T); // Convert audioDB trackID and trackPos to an lsh point index + static Uns32T index_to_trackID(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackID + static Uns32T index_to_trackPos(Uns32T lshID, Uns32T nPntBits); // Convert lsh point index to audioDB trackPos (spos) + static Uns32T index_from_trackInfo(Uns32T trackID, Uns32T pntID, Uns32T nPntBits); // Convert audioDB trackID and trackPos to an lsh point index void initialize_exact_evalutation_queue(); void index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos); LSH* index_allocate(char* indexName, bool load_hashTables); - + void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); + // Web Services void startServer(); void ws_status(const char*dbName, char* hostport); @@ -370,7 +400,9 @@ timesFile(0), \ powerFileName(0), \ powerFile(0), \ - powerfd(0), \ + adb_root(0), \ + adb_feature_root(0), \ + powerfd(0), \ dbfid(0), \ lshfid(0), \ forWrite(false), \ @@ -386,6 +418,9 @@ l2normTable(0), \ timesTable(0), \ powerTable(0), \ + featureFileNameTable(0), \ + timesFileNameTable(0), \ + powerFileNameTable(0), \ fileTableLength(0), \ trackTableLength(0), \ dataBufLength(0), \ @@ -431,5 +466,6 @@ lsh_param_N(0), \ lsh_param_b(0), \ lsh_param_ncols(0), \ + lsh_n_point_bits(0), \ vv(0) #endif