mas01mc@292: #ifndef __AUDIODB_H_ mas01mc@292: #define __AUDIODB_H_ mas01mc@292: mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@657: #include mas01cr@650: #if !defined(WIN32) mas01cr@0: #include mas01cr@650: #endif mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@302: #include mas01cr@498: #include mas01cr@302: #include mas01cr@0: #include mas01cr@657: #include mas01cr@0: #include mas01cr@62: #include mas01cr@104: #include mas01cr@0: mas01mc@292: // includes for LSH indexing mas01cr@498: extern "C" { mas01cr@498: #include "audioDB_API.h" mas01cr@498: } mas01cr@509: #include "audioDB-internals.h" mas01mc@292: #include "ReporterBase.h" mas01cr@498: #include "accumulator.h" mas01mc@292: #include "lshlib.h" mas01mc@292: mas01cr@0: // includes for web services mas01cr@0: #include "soapH.h" mas01cr@0: #include "cmdline.h" mas01cr@0: mas01cr@509: #define MAXSTR ADB_MAXSTR mas01cr@0: mas01cr@0: // Databse PRIMARY commands mas01cr@0: #define COM_CREATE "--NEW" mas01cr@0: #define COM_INSERT "--INSERT" mas01cr@0: #define COM_BATCHINSERT "--BATCHINSERT" mas01cr@0: #define COM_QUERY "--QUERY" mas01cr@0: #define COM_STATUS "--STATUS" mas01cr@0: #define COM_L2NORM "--L2NORM" mas01cr@193: #define COM_POWER "--POWER" mas01cr@0: #define COM_DUMP "--DUMP" mas01cr@0: #define COM_SERVER "--SERVER" mas01mc@292: #define COM_INDEX "--INDEX" mas01cr@280: #define COM_SAMPLE "--SAMPLE" mas01mc@334: #define COM_LISZT "--LISZT" mas01cr@0: mas01cr@0: // parameters mas01cr@0: #define COM_CLIENT "--client" mas01cr@0: #define COM_DATABASE "--database" mas01cr@0: #define COM_QTYPE "--qtype" mas01cr@0: #define COM_SEQLEN "--sequencelength" mas01cr@0: #define COM_SEQHOP "--sequencehop" mas01cr@0: #define COM_POINTNN "--pointnn" mas01mc@307: #define COM_RADIUS "--radius" mas01mc@18: #define COM_TRACKNN "--resultlength" mas01cr@0: #define COM_QPOINT "--qpoint" mas01cr@0: #define COM_FEATURES "--features" mas01cr@0: #define COM_QUERYKEY "--key" mas01cr@0: #define COM_KEYLIST "--keyList" mas01cr@0: #define COM_TIMES "--times" mas01cr@193: #define COM_QUERYPOWER "--power" mas01cr@193: #define COM_RELATIVE_THRESH "--relative-threshold" mas01cr@193: #define COM_ABSOLUTE_THRESH "--absolute-threshold" mas01mc@310: #define COM_EXHAUSTIVE "--exhaustive" mas01mc@310: #define COM_LSH_EXACT "--lsh_exact" mas01mc@471: #define COM_NO_UNIT_NORMING "--no_unit_norming" mas01mc@768: #define COM_DISTANCE_KULLBACK "--distance_kullback" mas01cr@0: mas01cr@0: #define O2_DEFAULT_POINTNN (10U) mas01mc@18: #define O2_DEFAULT_TRACKNN (10U) mas01cr@0: mas01mc@248: //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size mas01mc@7: #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size mas01cr@0: mas01cr@509: #define O2_DEFAULT_DATASIZE (1355U) /* in MB */ mas01cr@509: #define O2_DEFAULT_NTRACKS (20000U) mas01cr@509: #define O2_DEFAULT_DATADIM (9U) mas01mc@295: mas01mc@295: // LIMIT PARAMETERS mas01mc@292: #define O2_REALTYPE (double) mas01mc@324: #define O2_MAXFILES (1000000U) mas01cr@509: #define O2_MAXFILESTR ADB_FILETABLE_ENTRY_SIZE mas01cr@509: #define O2_FILETABLE_ENTRY_SIZE ADB_FILETABLE_ENTRY_SIZE mas01cr@509: #define O2_TRACKTABLE_ENTRY_SIZE ADB_TRACKTABLE_ENTRY_SIZE mas01cr@0: #define O2_HEADERSIZE (sizeof(dbTableHeaderT)) mas01cr@0: #define O2_MEANNUMVECTORS (1000U) mas01mc@464: #define O2_MAXDIM (20000U) mas01mc@263: #define O2_MAXNN (1000000U) mas01mc@292: #define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence mas01mc@324: #define O2_MAXTRACKS (1000000U) // maximum number of tracks mas01mc@534: mas01mc@292: #define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB mas01mc@324: #define O2_SERIAL_MAX_TRACKBATCH (1000000) mas01mc@324: #define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes) mas01mc@324: #define O2_LARGE_ADB_NTRACKS (O2_DEFAULT_NTRACKS+1) // ntracks at which features are kept externally mas01mc@324: #define O2_MAX_VECTORS ( O2_MEANNUMVECTORS * O2_MAXTRACKS ) mas01cr@0: mas01cr@0: // Flags mas01cr@509: #define O2_FLAG_L2NORM ADB_HEADER_FLAG_L2NORM mas01cr@0: #define O2_FLAG_MINMAX (0x2U) mas01cr@509: #define O2_FLAG_POWER ADB_HEADER_FLAG_POWER mas01cr@509: #define O2_FLAG_TIMES ADB_HEADER_FLAG_TIMES mas01cr@509: #define O2_FLAG_LARGE_ADB ADB_HEADER_FLAG_REFERENCES mas01mc@301: #define DISPLAY_FLAG(x) (x?"on":"off") mas01cr@0: mas01cr@105: // Query types mas01cr@105: #define O2_POINT_QUERY (0x4U) mas01cr@105: #define O2_SEQUENCE_QUERY (0x8U) mas01cr@105: #define O2_TRACK_QUERY (0x10U) mas01mc@248: #define O2_N_SEQUENCE_QUERY (0x20U) mas01mc@263: #define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U) mas01mc@248: mas01cr@0: // Error Codes mas01cr@0: #define O2_ERR_KEYNOTFOUND (0xFFFFFF00) mas01cr@0: mas01cr@0: // Macros mas01cr@0: #define O2_ACTION(a) (strcmp(command,a)==0) mas01cr@0: mas01cr@370: #define ALIGN_UP(x,w) (((x) + ((1< vv) { \ mas01cr@239: fprintf(stderr, __VA_ARGS__); \ mas01cr@239: fflush(stderr); \ mas01cr@239: } mas01cr@0: mas01mc@324: // We will only use this in a 32-bit address space mas01mc@324: // So map the off_t down to 32-bits first mas01mc@324: #define INSERT_FILETABLE_STRING(TABLE, STR) \ mas01mc@324: strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR)); mas01mc@324: mas01mc@324: #define SAFE_DELETE(PTR) delete PTR; PTR=0; mas01mc@324: #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0; mas01mc@324: mas01mc@324: extern char* SERVER_ADB_ROOT; mas01mc@324: extern char* SERVER_ADB_FEATURE_ROOT; mas01mc@308: mas01mc@308: class audioDB{ mas01cr@0: private: mas01cr@0: gengetopt_args_info args_info; mas01cr@0: unsigned dim; mas01cr@0: const char *dbName; mas01cr@0: const char *inFile; mas01cr@0: const char *hostport; mas01cr@0: const char *key; mas01mc@18: const char* trackFileName; mas01cr@239: std::ifstream *trackFile; mas01cr@0: const char *command; mas01cr@131: const char *output; mas01cr@0: const char *timesFileName; mas01cr@239: std::ifstream *timesFile; mas01cr@193: const char *powerFileName; mas01cr@239: std::ifstream *powerFile; mas01mc@324: const char* adb_root; mas01mc@324: const char* adb_feature_root; mas01mc@324: mas01cr@193: int powerfd; mas01cr@0: int dbfid; mas01mc@292: int lshfid; mas01cr@196: bool forWrite; mas01cr@0: int infid; mas01cr@0: struct stat statbuf; mas01cr@673: struct adb_header *dbH; mas01cr@498: struct adb *adb; mas01cr@284: mas01mc@324: char* fileTable; mas01mc@18: unsigned* trackTable; mas01cr@0: double* l2normTable; mas01cr@196: double* timesTable; mas01cr@193: double* powerTable; mas01cr@0: mas01mc@324: char* featureFileNameTable; mas01mc@324: char* timesFileNameTable; mas01mc@324: char* powerFileNameTable; mas01mc@324: mas01cr@196: size_t fileTableLength; mas01cr@196: size_t trackTableLength; mas01cr@196: size_t timesTableLength; mas01cr@196: size_t powerTableLength; mas01cr@196: size_t l2normTableLength; mas01cr@196: mas01cr@0: // Flags and parameters mas01cr@0: unsigned verbosity; // how much do we want to know? mas01cr@256: mas01cr@280: unsigned nsamples; mas01cr@280: mas01cr@256: //off_t size; // given size (for creation) mas01cr@256: unsigned datasize; // size in MB mas01cr@256: unsigned ntracks; mas01cr@256: unsigned datadim; mas01cr@256: mas01cr@0: unsigned queryType; // point queries default mas01cr@0: unsigned pointNN; // how many point NNs ? mas01mc@18: unsigned trackNN; // how many track NNs ? mas01cr@0: unsigned sequenceLength; mas01cr@0: unsigned sequenceHop; mas01cr@239: bool normalizedDistance; mas01mc@292: bool no_unit_norming; mas01mc@768: bool distance_kullback; mas01cr@0: unsigned queryPoint; mas01cr@0: unsigned usingQueryPoint; mas01cr@0: unsigned usingTimes; mas01cr@193: unsigned usingPower; mas01cr@0: unsigned isClient; mas01cr@0: unsigned isServer; mas01cr@0: unsigned port; mas01cr@0: double timesTol; mas01mc@17: double radius; mas01mc@292: bool query_from_key; mas01cr@193: bool use_absolute_threshold; mas01cr@193: double absolute_threshold; mas01cr@193: bool use_relative_threshold; mas01cr@193: double relative_threshold; mas01mc@334: mas01mc@292: ReporterBase* reporter; // track/point reporter mas01mc@292: mas01mc@334: // LISZT parameters mas01mc@334: unsigned lisztOffset; mas01mc@334: unsigned lisztLength; mas01mc@334: mas01cr@0: // private methods mas01cr@572: void error(const char* a, const char* b = "", const char *sysFunc = 0) __attribute__ ((noreturn)); mas01cr@193: mas01cr@498: void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); mas01cr@196: void initDBHeader(const char *dbName); mas01cr@498: void initInputFile(const char *inFile); mas01mc@292: void initTables(const char* dbName, const char* inFile = 0); mas01mc@292: void initTablesFromKey(const char* dbName, const Uns32T queryIndex); mas01mc@324: void prefix_name(char** const name, const char* prefix); mas01mc@324: mas01cr@0: public: mas01cr@370: audioDB(const unsigned argc, const char *argv[]); mas01cr@508: audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__queryResponse *adbQueryResponse); mas01cr@370: audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse); mas01cr@548: audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__lisztResponse *adbLisztResponse); mas01mc@334: mas01cr@97: void cleanup(); mas01cr@0: ~audioDB(); mas01cr@370: int processArgs(const unsigned argc, const char* argv[]); mas01cr@0: void create(const char* dbName); mas01cr@0: void insert(const char* dbName, const char* inFile); mas01cr@0: void batchinsert(const char* dbName, const char* inFile); mas01cr@697: void datumFromFiles(adb_datum_t *datum); mas01cr@508: void query(const char* dbName, const char* inFile, struct soap *soap=0, adb__queryResponse *adbQueryResponse=0); mas01cr@133: void status(const char* dbName, adb__statusResponse *adbStatusResponse=0); mas01ik@355: mas01cr@284: unsigned random_track(unsigned *propTable, unsigned total); mas01cr@280: void sample(const char *dbName); mas01cr@0: void l2norm(const char* dbName); mas01cr@193: void power_flag(const char *dbName); mas01cr@0: void dump(const char* dbName); mas01cr@548: void liszt(const char* dbName, unsigned offset, unsigned numLines, struct soap *soap=0, adb__lisztResponse* adbLisztResponse=0); mas01cr@0: mas01mc@292: // LSH indexing parameters and data structures mas01mc@292: LSH* lsh; mas01mc@292: bool lsh_in_core; // load LSH tables for query into core (true) or keep on disk (false) mas01mc@292: bool lsh_use_u_functions; mas01mc@292: bool lsh_exact; // flag to indicate use exact evaluation of points returned by LSH mas01mc@308: bool WS_load_index; // flag to indicate that we want to make a Web Services index memory resident mas01mc@292: double lsh_param_w; // Width of LSH hash-function bins mas01mc@292: Uns32T lsh_param_k; // Number of independent hash functions mas01mc@292: Uns32T lsh_param_m; // Combinatorial parameter for m(m-1)/2 hash tables mas01mc@292: Uns32T lsh_param_N; // Number of rows per hash table mas01mc@292: Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration mas01mc@292: Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row mas01mc@292: mas01mc@292: // LSH indexing and retrieval methods mas01mc@292: void index_index_db(const char* dbName); mas01mc@292: void index_initialize(double**,double**,double**,double**,unsigned int*); mas01mc@292: void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); mas01mc@292: int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp); mas01mc@292: Uns32T index_insert_shingles(vector >*, Uns32T trackID, double* spp); mas01cr@498: void insertPowerData(unsigned n, int powerfd, double *powerdata); mas01mc@324: void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); mas01mc@324: mas01mc@292: // Web Services mas01cr@0: void startServer(); mas01ik@355: mas01mc@308: void ws_status(const char*dbName, char* hostport); mas01mc@308: void ws_query(const char*dbName, const char *featureFileName, const char* hostport); mas01mc@328: void ws_query_by_key(const char*dbName, const char *trackKey, const char* featureFileName, const char* hostport); mas01mc@334: void ws_liszt(const char* dbName, char* hostport); mas01mc@334: mas01cr@0: }; mas01mc@17: mas01mc@292: #define O2_AUDIODB_INITIALIZERS \ mas01mc@292: dim(0), \ mas01mc@292: dbName(0), \ mas01mc@292: inFile(0), \ mas01mc@292: key(0), \ mas01mc@292: trackFileName(0), \ mas01mc@292: trackFile(0), \ mas01mc@292: command(0), \ mas01mc@292: output(0), \ mas01mc@292: timesFileName(0), \ mas01mc@292: timesFile(0), \ mas01mc@292: powerFileName(0), \ mas01mc@292: powerFile(0), \ mas01mc@324: adb_root(0), \ mas01mc@324: adb_feature_root(0), \ mas01mc@324: powerfd(0), \ mas01mc@292: dbfid(0), \ mas01mc@292: lshfid(0), \ mas01mc@292: forWrite(false), \ mas01mc@292: infid(0), \ mas01mc@292: dbH(0), \ mas01cr@498: adb(0), \ mas01mc@292: fileTable(0), \ mas01mc@292: trackTable(0), \ mas01mc@292: l2normTable(0), \ mas01mc@292: timesTable(0), \ mas01mc@314: powerTable(0), \ mas01mc@324: featureFileNameTable(0), \ mas01mc@324: timesFileNameTable(0), \ mas01mc@324: powerFileNameTable(0), \ mas01mc@292: fileTableLength(0), \ mas01mc@292: trackTableLength(0), \ mas01mc@292: timesTableLength(0), \ mas01mc@292: powerTableLength(0), \ mas01mc@292: l2normTableLength(0), \ mas01mc@292: verbosity(1), \ mas01mc@292: nsamples(2000), \ mas01mc@292: datasize(O2_DEFAULT_DATASIZE), \ mas01mc@292: ntracks(O2_DEFAULT_NTRACKS), \ mas01mc@292: datadim(O2_DEFAULT_DATADIM), \ mas01mc@292: queryType(O2_POINT_QUERY), \ mas01mc@292: pointNN(O2_DEFAULT_POINTNN), \ mas01mc@292: trackNN(O2_DEFAULT_TRACKNN), \ mas01mc@292: sequenceLength(16), \ mas01mc@292: sequenceHop(1), \ mas01mc@292: normalizedDistance(true), \ mas01mc@292: no_unit_norming(false), \ mas01mc@768: distance_kullback(false), \ mas01mc@292: queryPoint(0), \ mas01mc@292: usingQueryPoint(0), \ mas01mc@292: usingTimes(0), \ mas01mc@292: usingPower(0), \ mas01mc@292: isClient(0), \ mas01mc@292: isServer(0), \ mas01mc@292: port(0), \ mas01mc@292: timesTol(0.1), \ mas01mc@292: radius(0), \ mas01mc@292: query_from_key(false), \ mas01mc@292: use_absolute_threshold(false), \ mas01mc@292: absolute_threshold(0.0), \ mas01mc@292: use_relative_threshold(false), \ mas01mc@292: relative_threshold(0.0), \ mas01mc@292: reporter(0), \ mas01mc@334: lisztOffset(0), \ mas01mc@334: lisztLength(0), \ mas01mc@292: lsh(0), \ mas01mc@292: lsh_in_core(false), \ mas01mc@292: lsh_use_u_functions(false), \ mas01mc@292: lsh_exact(false), \ mas01mc@308: WS_load_index(false), \ mas01mc@292: lsh_param_k(0), \ mas01mc@292: lsh_param_m(0), \ mas01mc@292: lsh_param_N(0), \ mas01mc@292: lsh_param_b(0), \ mas01cr@498: lsh_param_ncols(0) mas01mc@292: #endif