Mercurial > hg > audiodb
diff audioDB.h @ 0:9eab3e0f3068
Initial import
author | mas01cr |
---|---|
date | Fri, 20 Jul 2007 15:51:39 +0000 |
parents | |
children | bd6bb994081b 69eb22e09772 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/audioDB.h Fri Jul 20 15:51:39 2007 +0000 @@ -0,0 +1,254 @@ +/* audioDB.h + +audioDB version 1.0 + +An efficient feature-vector database management system (FVDBMS) for +content-based multimedia search and retrieval. + +Usage: audioDB [OPTIONS]... + + --full-help Print help, including hidden options, and exit + -V, --version Print version and exit + -H, --help print help on audioDB usage and exit. + +Database Setup: + These commands require a database argument. + -d, --database=filename database name to be used with database commands + -N, --new make a new database + -S, --status database information + -D, --dump list all segments: index key size + +Database Insertion: + The following commands process a binary input feature file and optional + associated key. + -I, --insert add feature vectors to an existing database + -f, --features=filename binary series of vectors file + -t, --times=filename list of time points (ascii) for feature vectors + -k, --key=identifier unique identifier associated with features + +Batch Commands: + These batch commands require a list of feature vector filenames in a text + file and optional list of keys in a text file. + -B, --batchinsert add feature vectors named in a featureList file + (with optional keys in a keyList file) to the + named database + -F, --featureList=filename text file containing list of binary feature + vector files to process + -T, --timesList=filename text file containing list of ascii time-point + files for each feature vector file named in + featureList + -K, --keyList=filename text file containing list of unique identifiers + to associate with list of feature files + +Database Search: + Thse commands control the behaviour of retrieval from a named database. + -Q, --query perform a content-based search on the named + database using the named feature vector file + as a query + -q, --qtype=type the type of search (possible values="point", + "segment", "sequence" default=`sequence') + -p, --qpoint=position ordinal position of query vector (or start of + sequence) in feature vector input file + (default=`0') + -n, --pointnn=numpoints number of point nearest neighbours to use [per + segment in segment and sequence mode] + (default=`10') + -r, --resultlength=length maximum length of the result list + (default=`10') + -l, --sequencelength=length length of sequences for sequence search + (default=`16') + -h, --sequencehop=hop hop size of sequence window for sequence search + (default=`1') + +Web Services: + These commands enable the database process to establish a connection via the + internet and operate as separate client and server processes. + -s, --server=port run as standalone web service on named port + (default=`80011') + -c, --client=hostname:port run as a client using named host service + +*/ + + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <iostream> +#include <fstream> +#include <math.h> +#include <sys/time.h> +#include <assert.h> + +// includes for web services +#include "soapH.h" +#include "adb.nsmap" +#include "cmdline.h" + +#define MAXSTR 512 + +// Databse PRIMARY commands +#define COM_CREATE "--NEW" +#define COM_INSERT "--INSERT" +#define COM_BATCHINSERT "--BATCHINSERT" +#define COM_QUERY "--QUERY" +#define COM_STATUS "--STATUS" +#define COM_L2NORM "--L2NORM" +#define COM_DUMP "--DUMP" +#define COM_SERVER "--SERVER" + +// parameters +#define COM_CLIENT "--client" +#define COM_DATABASE "--database" +#define COM_QTYPE "--qtype" +#define COM_SEQLEN "--sequencelength" +#define COM_SEQHOP "--sequencehop" +#define COM_POINTNN "--pointnn" +#define COM_SEGNN "--resultlength" +#define COM_QPOINT "--qpoint" +#define COM_FEATURES "--features" +#define COM_QUERYKEY "--key" +#define COM_KEYLIST "--keyList" +#define COM_TIMES "--times" + +#define O2_MAGIC 1111765583 // 'B'<<24|'D'<<16|'2'<<8|'O' reads O2DB in little endian order + +#define O2_DEFAULT_POINTNN (10U) +#define O2_DEFAULT_SEGNN (10U) + +//#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size +#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size + +//#define O2_MAXFILES (1000000) +#define O2_MAXFILES (10000U) // 10,000 files +#define O2_MAXFILESTR (256U) +#define O2_FILETABLESIZE (O2_MAXFILESTR) +#define O2_SEGTABLESIZE (sizeof(unsigned)) +#define O2_HEADERSIZE (sizeof(dbTableHeaderT)) +#define O2_MEANNUMVECTORS (1000U) +#define O2_MAXDIM (1000U) +#define O2_MAXNN (1000U) + +// Flags +#define O2_FLAG_L2NORM (0x1U) +#define O2_FLAG_MINMAX (0x2U) +#define O2_FLAG_POINT_QUERY (0x4U) +#define O2_FLAG_SEQUENCE_QUERY (0x8U) +#define O2_FLAG_SEG_QUERY (0x10U) +#define O2_FLAG_TIMES (0x20U) + +// Error Codes +#define O2_ERR_KEYNOTFOUND (0xFFFFFF00) + +// Macros +#define O2_ACTION(a) (strcmp(command,a)==0) + +using namespace std; + +// 64 byte header +typedef struct dbTableHeader{ + unsigned magic; + unsigned numFiles; + unsigned dim; + unsigned length; + unsigned flags; +} dbTableHeaderT, *dbTableHeaderPtr; + + +class audioDB{ + + private: + gengetopt_args_info args_info; + unsigned dim; + const char *dbName; + const char *inFile; + const char *hostport; + const char *key; + const char* segFileName; + ifstream *segFile; + const char *command; + const char *timesFileName; + ifstream *timesFile; + + int dbfid; + int infid; + char* db; + char* indata; + struct stat statbuf; + dbTableHeaderPtr dbH; + size_t fileTableOffset; + size_t segTableOffset; + size_t dataoffset; + size_t l2normTableOffset; + size_t timesTableOffset; + + char *fileTable; + unsigned* segTable; + double* dataBuf; + double* inBuf; + double* l2normTable; + double* qNorm; + double* sNorm; + double* timesTable; + + // Flags and parameters + unsigned verbosity; // how much do we want to know? + unsigned queryType; // point queries default + unsigned pointNN; // how many point NNs ? + unsigned segNN; // how many seg NNs ? + unsigned sequenceLength; + unsigned sequenceHop; + unsigned queryPoint; + unsigned usingQueryPoint; + unsigned usingTimes; + unsigned isClient; + unsigned isServer; + unsigned port; + double timesTol; + + // Timers + struct timeval tv1; + struct timeval tv2; + + + + + // private methods + void error(const char* a, const char* b = ""); + void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + + void initTables(const char* dbName, const char* inFile); + void NBestMatchedFilter(); + void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); + void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append); + void normalize(double* X, int dim, int n); + void normalize(double* X, int dim, int n, double minval, double maxval); + void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata); + unsigned getKeyPos(char* key); + public: + + audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult=0); + ~audioDB(); + int processArgs(const unsigned argc, char* const argv[]); + void create(const char* dbName); + void drop(); + void insert(const char* dbName, const char* inFile); + void batchinsert(const char* dbName, const char* inFile); + void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void status(const char* dbName); + void ws_status(const char*dbName, char* hostport); + void ws_query(const char*dbName, const char *segKey, const char* hostport); + void l2norm(const char* dbName); + void dump(const char* dbName); + void deleteDB(const char* dbName, const char* inFile); + + // web services + void startServer(); + +};