mas01cr@0
|
1 /* audioDB.h
|
mas01cr@0
|
2
|
mas01cr@0
|
3 audioDB version 1.0
|
mas01cr@0
|
4
|
mas01cr@0
|
5 An efficient feature-vector database management system (FVDBMS) for
|
mas01cr@0
|
6 content-based multimedia search and retrieval.
|
mas01cr@0
|
7
|
mas01cr@0
|
8 Usage: audioDB [OPTIONS]...
|
mas01cr@0
|
9
|
mas01cr@0
|
10 --full-help Print help, including hidden options, and exit
|
mas01cr@0
|
11 -V, --version Print version and exit
|
mas01cr@0
|
12 -H, --help print help on audioDB usage and exit.
|
mas01cr@0
|
13
|
mas01cr@0
|
14 Database Setup:
|
mas01cr@0
|
15 These commands require a database argument.
|
mas01cr@0
|
16 -d, --database=filename database name to be used with database commands
|
mas01cr@0
|
17 -N, --new make a new database
|
mas01cr@0
|
18 -S, --status database information
|
mas01mc@18
|
19 -D, --dump list all tracks: index key size
|
mas01cr@0
|
20
|
mas01cr@0
|
21 Database Insertion:
|
mas01cr@0
|
22 The following commands process a binary input feature file and optional
|
mas01cr@0
|
23 associated key.
|
mas01cr@0
|
24 -I, --insert add feature vectors to an existing database
|
mas01cr@0
|
25 -f, --features=filename binary series of vectors file
|
mas01cr@0
|
26 -t, --times=filename list of time points (ascii) for feature vectors
|
mas01cr@0
|
27 -k, --key=identifier unique identifier associated with features
|
mas01cr@0
|
28
|
mas01cr@0
|
29 Batch Commands:
|
mas01cr@0
|
30 These batch commands require a list of feature vector filenames in a text
|
mas01cr@0
|
31 file and optional list of keys in a text file.
|
mas01cr@0
|
32 -B, --batchinsert add feature vectors named in a featureList file
|
mas01cr@0
|
33 (with optional keys in a keyList file) to the
|
mas01cr@0
|
34 named database
|
mas01cr@0
|
35 -F, --featureList=filename text file containing list of binary feature
|
mas01cr@0
|
36 vector files to process
|
mas01cr@0
|
37 -T, --timesList=filename text file containing list of ascii time-point
|
mas01cr@0
|
38 files for each feature vector file named in
|
mas01cr@0
|
39 featureList
|
mas01cr@0
|
40 -K, --keyList=filename text file containing list of unique identifiers
|
mas01cr@0
|
41 to associate with list of feature files
|
mas01cr@0
|
42
|
mas01cr@0
|
43 Database Search:
|
mas01cr@0
|
44 Thse commands control the behaviour of retrieval from a named database.
|
mas01cr@0
|
45 -Q, --query perform a content-based search on the named
|
mas01cr@0
|
46 database using the named feature vector file
|
mas01cr@0
|
47 as a query
|
mas01cr@0
|
48 -q, --qtype=type the type of search (possible values="point",
|
mas01mc@18
|
49 "track", "sequence" default=`sequence')
|
mas01cr@0
|
50 -p, --qpoint=position ordinal position of query vector (or start of
|
mas01cr@0
|
51 sequence) in feature vector input file
|
mas01cr@0
|
52 (default=`0')
|
mas01cr@0
|
53 -n, --pointnn=numpoints number of point nearest neighbours to use [per
|
mas01mc@18
|
54 track in track and sequence mode]
|
mas01cr@0
|
55 (default=`10')
|
mas01cr@0
|
56 -r, --resultlength=length maximum length of the result list
|
mas01cr@0
|
57 (default=`10')
|
mas01cr@0
|
58 -l, --sequencelength=length length of sequences for sequence search
|
mas01cr@0
|
59 (default=`16')
|
mas01cr@0
|
60 -h, --sequencehop=hop hop size of sequence window for sequence search
|
mas01cr@0
|
61 (default=`1')
|
mas01cr@0
|
62
|
mas01cr@0
|
63 Web Services:
|
mas01cr@0
|
64 These commands enable the database process to establish a connection via the
|
mas01cr@0
|
65 internet and operate as separate client and server processes.
|
mas01cr@0
|
66 -s, --server=port run as standalone web service on named port
|
mas01cr@0
|
67 (default=`80011')
|
mas01cr@0
|
68 -c, --client=hostname:port run as a client using named host service
|
mas01cr@0
|
69
|
mas01cr@0
|
70 */
|
mas01cr@0
|
71
|
mas01cr@0
|
72
|
mas01cr@0
|
73 #include <stdio.h>
|
mas01cr@0
|
74 #include <stdlib.h>
|
mas01cr@0
|
75 #include <sys/types.h>
|
mas01cr@0
|
76 #include <sys/stat.h>
|
mas01cr@0
|
77 #include <sys/mman.h>
|
mas01cr@0
|
78 #include <fcntl.h>
|
mas01cr@0
|
79 #include <string.h>
|
mas01cr@0
|
80 #include <iostream>
|
mas01cr@0
|
81 #include <fstream>
|
mas01cr@0
|
82 #include <math.h>
|
mas01cr@0
|
83 #include <sys/time.h>
|
mas01cr@0
|
84 #include <assert.h>
|
mas01cr@62
|
85 #include <float.h>
|
mas01cr@0
|
86
|
mas01cr@0
|
87 // includes for web services
|
mas01cr@0
|
88 #include "soapH.h"
|
mas01cr@0
|
89 #include "adb.nsmap"
|
mas01cr@0
|
90 #include "cmdline.h"
|
mas01cr@0
|
91
|
mas01cr@0
|
92 #define MAXSTR 512
|
mas01cr@0
|
93
|
mas01cr@0
|
94 // Databse PRIMARY commands
|
mas01cr@0
|
95 #define COM_CREATE "--NEW"
|
mas01cr@0
|
96 #define COM_INSERT "--INSERT"
|
mas01cr@0
|
97 #define COM_BATCHINSERT "--BATCHINSERT"
|
mas01cr@0
|
98 #define COM_QUERY "--QUERY"
|
mas01cr@0
|
99 #define COM_STATUS "--STATUS"
|
mas01cr@0
|
100 #define COM_L2NORM "--L2NORM"
|
mas01cr@0
|
101 #define COM_DUMP "--DUMP"
|
mas01cr@0
|
102 #define COM_SERVER "--SERVER"
|
mas01cr@0
|
103
|
mas01cr@0
|
104 // parameters
|
mas01cr@0
|
105 #define COM_CLIENT "--client"
|
mas01cr@0
|
106 #define COM_DATABASE "--database"
|
mas01cr@0
|
107 #define COM_QTYPE "--qtype"
|
mas01cr@0
|
108 #define COM_SEQLEN "--sequencelength"
|
mas01cr@0
|
109 #define COM_SEQHOP "--sequencehop"
|
mas01cr@0
|
110 #define COM_POINTNN "--pointnn"
|
mas01mc@18
|
111 #define COM_TRACKNN "--resultlength"
|
mas01cr@0
|
112 #define COM_QPOINT "--qpoint"
|
mas01cr@0
|
113 #define COM_FEATURES "--features"
|
mas01cr@0
|
114 #define COM_QUERYKEY "--key"
|
mas01cr@0
|
115 #define COM_KEYLIST "--keyList"
|
mas01cr@0
|
116 #define COM_TIMES "--times"
|
mas01cr@0
|
117
|
mas01cr@0
|
118 #define O2_MAGIC 1111765583 // 'B'<<24|'D'<<16|'2'<<8|'O' reads O2DB in little endian order
|
mas01cr@0
|
119
|
mas01cr@0
|
120 #define O2_DEFAULT_POINTNN (10U)
|
mas01mc@18
|
121 #define O2_DEFAULT_TRACKNN (10U)
|
mas01cr@0
|
122
|
mas01mc@7
|
123 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
|
mas01mc@7
|
124 //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
|
mas01cr@0
|
125
|
mas01cr@0
|
126 //#define O2_MAXFILES (1000000)
|
mas01cr@0
|
127 #define O2_MAXFILES (10000U) // 10,000 files
|
mas01cr@0
|
128 #define O2_MAXFILESTR (256U)
|
mas01cr@0
|
129 #define O2_FILETABLESIZE (O2_MAXFILESTR)
|
mas01mc@18
|
130 #define O2_TRACKTABLESIZE (sizeof(unsigned))
|
mas01cr@0
|
131 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
|
mas01cr@0
|
132 #define O2_MEANNUMVECTORS (1000U)
|
mas01cr@0
|
133 #define O2_MAXDIM (1000U)
|
mas01mc@17
|
134 #define O2_MAXNN (10000U)
|
mas01cr@0
|
135
|
mas01cr@0
|
136 // Flags
|
mas01cr@0
|
137 #define O2_FLAG_L2NORM (0x1U)
|
mas01cr@0
|
138 #define O2_FLAG_MINMAX (0x2U)
|
mas01cr@0
|
139 #define O2_FLAG_POINT_QUERY (0x4U)
|
mas01cr@0
|
140 #define O2_FLAG_SEQUENCE_QUERY (0x8U)
|
mas01mc@18
|
141 #define O2_FLAG_TRACK_QUERY (0x10U)
|
mas01cr@0
|
142 #define O2_FLAG_TIMES (0x20U)
|
mas01cr@0
|
143
|
mas01cr@0
|
144 // Error Codes
|
mas01cr@0
|
145 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
|
mas01cr@0
|
146
|
mas01cr@0
|
147 // Macros
|
mas01cr@0
|
148 #define O2_ACTION(a) (strcmp(command,a)==0)
|
mas01cr@0
|
149
|
mas01cr@0
|
150 using namespace std;
|
mas01cr@0
|
151
|
mas01cr@0
|
152 // 64 byte header
|
mas01cr@0
|
153 typedef struct dbTableHeader{
|
mas01cr@0
|
154 unsigned magic;
|
mas01cr@0
|
155 unsigned numFiles;
|
mas01cr@0
|
156 unsigned dim;
|
mas01cr@0
|
157 unsigned length;
|
mas01cr@0
|
158 unsigned flags;
|
mas01cr@0
|
159 } dbTableHeaderT, *dbTableHeaderPtr;
|
mas01cr@0
|
160
|
mas01cr@0
|
161
|
mas01cr@0
|
162 class audioDB{
|
mas01cr@0
|
163
|
mas01cr@0
|
164 private:
|
mas01cr@0
|
165 gengetopt_args_info args_info;
|
mas01cr@0
|
166 unsigned dim;
|
mas01cr@0
|
167 const char *dbName;
|
mas01cr@0
|
168 const char *inFile;
|
mas01cr@0
|
169 const char *hostport;
|
mas01cr@0
|
170 const char *key;
|
mas01mc@18
|
171 const char* trackFileName;
|
mas01mc@18
|
172 ifstream *trackFile;
|
mas01cr@0
|
173 const char *command;
|
mas01cr@0
|
174 const char *timesFileName;
|
mas01cr@0
|
175 ifstream *timesFile;
|
mas01cr@0
|
176
|
mas01cr@0
|
177 int dbfid;
|
mas01cr@0
|
178 int infid;
|
mas01cr@0
|
179 char* db;
|
mas01cr@0
|
180 char* indata;
|
mas01cr@0
|
181 struct stat statbuf;
|
mas01cr@0
|
182 dbTableHeaderPtr dbH;
|
mas01cr@0
|
183 size_t fileTableOffset;
|
mas01mc@18
|
184 size_t trackTableOffset;
|
mas01cr@0
|
185 size_t dataoffset;
|
mas01cr@0
|
186 size_t l2normTableOffset;
|
mas01cr@0
|
187 size_t timesTableOffset;
|
mas01cr@0
|
188
|
mas01cr@0
|
189 char *fileTable;
|
mas01mc@18
|
190 unsigned* trackTable;
|
mas01cr@0
|
191 double* dataBuf;
|
mas01cr@0
|
192 double* inBuf;
|
mas01cr@0
|
193 double* l2normTable;
|
mas01cr@0
|
194 double* qNorm;
|
mas01cr@0
|
195 double* sNorm;
|
mas01cr@0
|
196 double* timesTable;
|
mas01cr@0
|
197
|
mas01cr@0
|
198 // Flags and parameters
|
mas01cr@0
|
199 unsigned verbosity; // how much do we want to know?
|
mas01cr@0
|
200 unsigned queryType; // point queries default
|
mas01cr@0
|
201 unsigned pointNN; // how many point NNs ?
|
mas01mc@18
|
202 unsigned trackNN; // how many track NNs ?
|
mas01cr@0
|
203 unsigned sequenceLength;
|
mas01cr@0
|
204 unsigned sequenceHop;
|
mas01cr@0
|
205 unsigned queryPoint;
|
mas01cr@0
|
206 unsigned usingQueryPoint;
|
mas01cr@0
|
207 unsigned usingTimes;
|
mas01cr@0
|
208 unsigned isClient;
|
mas01cr@0
|
209 unsigned isServer;
|
mas01cr@0
|
210 unsigned port;
|
mas01cr@0
|
211 double timesTol;
|
mas01mc@17
|
212 double radius;
|
mas01mc@17
|
213
|
mas01cr@0
|
214 // Timers
|
mas01cr@0
|
215 struct timeval tv1;
|
mas01cr@0
|
216 struct timeval tv2;
|
mas01cr@0
|
217
|
mas01cr@0
|
218 // private methods
|
mas01cr@32
|
219 void error(const char* a, const char* b = "", const char *sysFunc = 0);
|
mas01cr@0
|
220 void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01mc@18
|
221 void trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01mc@20
|
222 void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01mc@20
|
223 void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
224
|
mas01cr@27
|
225 void initTables(const char* dbName, bool forWrite, const char* inFile);
|
mas01cr@0
|
226 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
|
mas01cr@0
|
227 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
|
mas01cr@0
|
228 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
|
mas01cr@0
|
229 unsigned getKeyPos(char* key);
|
mas01cr@0
|
230 public:
|
mas01cr@0
|
231
|
mas01cr@0
|
232 audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
233 ~audioDB();
|
mas01cr@0
|
234 int processArgs(const unsigned argc, char* const argv[]);
|
mas01cr@30
|
235 void get_lock(int fd, bool exclusive);
|
mas01cr@30
|
236 void release_lock(int fd);
|
mas01cr@0
|
237 void create(const char* dbName);
|
mas01cr@0
|
238 void drop();
|
mas01cr@0
|
239 void insert(const char* dbName, const char* inFile);
|
mas01cr@0
|
240 void batchinsert(const char* dbName, const char* inFile);
|
mas01cr@0
|
241 void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
242 void status(const char* dbName);
|
mas01cr@0
|
243 void ws_status(const char*dbName, char* hostport);
|
mas01mc@18
|
244 void ws_query(const char*dbName, const char *trackKey, const char* hostport);
|
mas01cr@0
|
245 void l2norm(const char* dbName);
|
mas01cr@0
|
246 void dump(const char* dbName);
|
mas01cr@0
|
247
|
mas01cr@0
|
248 // web services
|
mas01cr@0
|
249 void startServer();
|
mas01cr@0
|
250
|
mas01cr@0
|
251 };
|
mas01mc@17
|
252
|