mas01cr@0
|
1 /* audioDB.h
|
mas01cr@0
|
2
|
mas01cr@0
|
3 audioDB version 1.0
|
mas01cr@0
|
4
|
mas01cr@0
|
5 An efficient feature-vector database management system (FVDBMS) for
|
mas01cr@0
|
6 content-based multimedia search and retrieval.
|
mas01cr@0
|
7
|
mas01cr@0
|
8 Usage: audioDB [OPTIONS]...
|
mas01cr@0
|
9
|
mas01cr@0
|
10 --full-help Print help, including hidden options, and exit
|
mas01cr@0
|
11 -V, --version Print version and exit
|
mas01cr@0
|
12 -H, --help print help on audioDB usage and exit.
|
mas01cr@0
|
13
|
mas01cr@0
|
14 Database Setup:
|
mas01cr@0
|
15 These commands require a database argument.
|
mas01cr@0
|
16 -d, --database=filename database name to be used with database commands
|
mas01cr@0
|
17 -N, --new make a new database
|
mas01cr@0
|
18 -S, --status database information
|
mas01cr@0
|
19 -D, --dump list all segments: index key size
|
mas01cr@0
|
20
|
mas01cr@0
|
21 Database Insertion:
|
mas01cr@0
|
22 The following commands process a binary input feature file and optional
|
mas01cr@0
|
23 associated key.
|
mas01cr@0
|
24 -I, --insert add feature vectors to an existing database
|
mas01cr@0
|
25 -f, --features=filename binary series of vectors file
|
mas01cr@0
|
26 -t, --times=filename list of time points (ascii) for feature vectors
|
mas01cr@0
|
27 -k, --key=identifier unique identifier associated with features
|
mas01cr@0
|
28
|
mas01cr@0
|
29 Batch Commands:
|
mas01cr@0
|
30 These batch commands require a list of feature vector filenames in a text
|
mas01cr@0
|
31 file and optional list of keys in a text file.
|
mas01cr@0
|
32 -B, --batchinsert add feature vectors named in a featureList file
|
mas01cr@0
|
33 (with optional keys in a keyList file) to the
|
mas01cr@0
|
34 named database
|
mas01cr@0
|
35 -F, --featureList=filename text file containing list of binary feature
|
mas01cr@0
|
36 vector files to process
|
mas01cr@0
|
37 -T, --timesList=filename text file containing list of ascii time-point
|
mas01cr@0
|
38 files for each feature vector file named in
|
mas01cr@0
|
39 featureList
|
mas01cr@0
|
40 -K, --keyList=filename text file containing list of unique identifiers
|
mas01cr@0
|
41 to associate with list of feature files
|
mas01cr@0
|
42
|
mas01cr@0
|
43 Database Search:
|
mas01cr@0
|
44 Thse commands control the behaviour of retrieval from a named database.
|
mas01cr@0
|
45 -Q, --query perform a content-based search on the named
|
mas01cr@0
|
46 database using the named feature vector file
|
mas01cr@0
|
47 as a query
|
mas01cr@0
|
48 -q, --qtype=type the type of search (possible values="point",
|
mas01cr@0
|
49 "segment", "sequence" default=`sequence')
|
mas01cr@0
|
50 -p, --qpoint=position ordinal position of query vector (or start of
|
mas01cr@0
|
51 sequence) in feature vector input file
|
mas01cr@0
|
52 (default=`0')
|
mas01cr@0
|
53 -n, --pointnn=numpoints number of point nearest neighbours to use [per
|
mas01cr@0
|
54 segment in segment and sequence mode]
|
mas01cr@0
|
55 (default=`10')
|
mas01cr@0
|
56 -r, --resultlength=length maximum length of the result list
|
mas01cr@0
|
57 (default=`10')
|
mas01cr@0
|
58 -l, --sequencelength=length length of sequences for sequence search
|
mas01cr@0
|
59 (default=`16')
|
mas01cr@0
|
60 -h, --sequencehop=hop hop size of sequence window for sequence search
|
mas01cr@0
|
61 (default=`1')
|
mas01cr@0
|
62
|
mas01cr@0
|
63 Web Services:
|
mas01cr@0
|
64 These commands enable the database process to establish a connection via the
|
mas01cr@0
|
65 internet and operate as separate client and server processes.
|
mas01cr@0
|
66 -s, --server=port run as standalone web service on named port
|
mas01cr@0
|
67 (default=`80011')
|
mas01cr@0
|
68 -c, --client=hostname:port run as a client using named host service
|
mas01cr@0
|
69
|
mas01cr@0
|
70 */
|
mas01cr@0
|
71
|
mas01cr@0
|
72
|
mas01cr@0
|
73 #include <stdio.h>
|
mas01cr@0
|
74 #include <stdlib.h>
|
mas01cr@0
|
75 #include <sys/types.h>
|
mas01cr@0
|
76 #include <sys/stat.h>
|
mas01cr@0
|
77 #include <sys/mman.h>
|
mas01cr@0
|
78 #include <fcntl.h>
|
mas01cr@0
|
79 #include <string.h>
|
mas01cr@0
|
80 #include <iostream>
|
mas01cr@0
|
81 #include <fstream>
|
mas01cr@0
|
82 #include <math.h>
|
mas01cr@0
|
83 #include <sys/time.h>
|
mas01cr@0
|
84 #include <assert.h>
|
mas01cr@0
|
85
|
mas01cr@0
|
86 // includes for web services
|
mas01cr@0
|
87 #include "soapH.h"
|
mas01cr@0
|
88 #include "adb.nsmap"
|
mas01cr@0
|
89 #include "cmdline.h"
|
mas01cr@0
|
90
|
mas01cr@0
|
91 #define MAXSTR 512
|
mas01cr@0
|
92
|
mas01cr@0
|
93 // Databse PRIMARY commands
|
mas01cr@0
|
94 #define COM_CREATE "--NEW"
|
mas01cr@0
|
95 #define COM_INSERT "--INSERT"
|
mas01cr@0
|
96 #define COM_BATCHINSERT "--BATCHINSERT"
|
mas01cr@0
|
97 #define COM_QUERY "--QUERY"
|
mas01cr@0
|
98 #define COM_STATUS "--STATUS"
|
mas01cr@0
|
99 #define COM_L2NORM "--L2NORM"
|
mas01cr@0
|
100 #define COM_DUMP "--DUMP"
|
mas01cr@0
|
101 #define COM_SERVER "--SERVER"
|
mas01cr@0
|
102
|
mas01cr@0
|
103 // parameters
|
mas01cr@0
|
104 #define COM_CLIENT "--client"
|
mas01cr@0
|
105 #define COM_DATABASE "--database"
|
mas01cr@0
|
106 #define COM_QTYPE "--qtype"
|
mas01cr@0
|
107 #define COM_SEQLEN "--sequencelength"
|
mas01cr@0
|
108 #define COM_SEQHOP "--sequencehop"
|
mas01cr@0
|
109 #define COM_POINTNN "--pointnn"
|
mas01cr@0
|
110 #define COM_SEGNN "--resultlength"
|
mas01cr@0
|
111 #define COM_QPOINT "--qpoint"
|
mas01cr@0
|
112 #define COM_FEATURES "--features"
|
mas01cr@0
|
113 #define COM_QUERYKEY "--key"
|
mas01cr@0
|
114 #define COM_KEYLIST "--keyList"
|
mas01cr@0
|
115 #define COM_TIMES "--times"
|
mas01cr@0
|
116
|
mas01cr@0
|
117 #define O2_MAGIC 1111765583 // 'B'<<24|'D'<<16|'2'<<8|'O' reads O2DB in little endian order
|
mas01cr@0
|
118
|
mas01cr@0
|
119 #define O2_DEFAULT_POINTNN (10U)
|
mas01cr@0
|
120 #define O2_DEFAULT_SEGNN (10U)
|
mas01cr@0
|
121
|
mas01cr@0
|
122 //#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
|
mas01cr@0
|
123 #define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
|
mas01cr@0
|
124
|
mas01cr@0
|
125 //#define O2_MAXFILES (1000000)
|
mas01cr@0
|
126 #define O2_MAXFILES (10000U) // 10,000 files
|
mas01cr@0
|
127 #define O2_MAXFILESTR (256U)
|
mas01cr@0
|
128 #define O2_FILETABLESIZE (O2_MAXFILESTR)
|
mas01cr@0
|
129 #define O2_SEGTABLESIZE (sizeof(unsigned))
|
mas01cr@0
|
130 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
|
mas01cr@0
|
131 #define O2_MEANNUMVECTORS (1000U)
|
mas01cr@0
|
132 #define O2_MAXDIM (1000U)
|
mas01cr@0
|
133 #define O2_MAXNN (1000U)
|
mas01cr@0
|
134
|
mas01cr@0
|
135 // Flags
|
mas01cr@0
|
136 #define O2_FLAG_L2NORM (0x1U)
|
mas01cr@0
|
137 #define O2_FLAG_MINMAX (0x2U)
|
mas01cr@0
|
138 #define O2_FLAG_POINT_QUERY (0x4U)
|
mas01cr@0
|
139 #define O2_FLAG_SEQUENCE_QUERY (0x8U)
|
mas01cr@0
|
140 #define O2_FLAG_SEG_QUERY (0x10U)
|
mas01cr@0
|
141 #define O2_FLAG_TIMES (0x20U)
|
mas01cr@0
|
142
|
mas01cr@0
|
143 // Error Codes
|
mas01cr@0
|
144 #define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
|
mas01cr@0
|
145
|
mas01cr@0
|
146 // Macros
|
mas01cr@0
|
147 #define O2_ACTION(a) (strcmp(command,a)==0)
|
mas01cr@0
|
148
|
mas01cr@0
|
149 using namespace std;
|
mas01cr@0
|
150
|
mas01cr@0
|
151 // 64 byte header
|
mas01cr@0
|
152 typedef struct dbTableHeader{
|
mas01cr@0
|
153 unsigned magic;
|
mas01cr@0
|
154 unsigned numFiles;
|
mas01cr@0
|
155 unsigned dim;
|
mas01cr@0
|
156 unsigned length;
|
mas01cr@0
|
157 unsigned flags;
|
mas01cr@0
|
158 } dbTableHeaderT, *dbTableHeaderPtr;
|
mas01cr@0
|
159
|
mas01cr@0
|
160
|
mas01cr@0
|
161 class audioDB{
|
mas01cr@0
|
162
|
mas01cr@0
|
163 private:
|
mas01cr@0
|
164 gengetopt_args_info args_info;
|
mas01cr@0
|
165 unsigned dim;
|
mas01cr@0
|
166 const char *dbName;
|
mas01cr@0
|
167 const char *inFile;
|
mas01cr@0
|
168 const char *hostport;
|
mas01cr@0
|
169 const char *key;
|
mas01cr@0
|
170 const char* segFileName;
|
mas01cr@0
|
171 ifstream *segFile;
|
mas01cr@0
|
172 const char *command;
|
mas01cr@0
|
173 const char *timesFileName;
|
mas01cr@0
|
174 ifstream *timesFile;
|
mas01cr@0
|
175
|
mas01cr@0
|
176 int dbfid;
|
mas01cr@0
|
177 int infid;
|
mas01cr@0
|
178 char* db;
|
mas01cr@0
|
179 char* indata;
|
mas01cr@0
|
180 struct stat statbuf;
|
mas01cr@0
|
181 dbTableHeaderPtr dbH;
|
mas01cr@0
|
182 size_t fileTableOffset;
|
mas01cr@0
|
183 size_t segTableOffset;
|
mas01cr@0
|
184 size_t dataoffset;
|
mas01cr@0
|
185 size_t l2normTableOffset;
|
mas01cr@0
|
186 size_t timesTableOffset;
|
mas01cr@0
|
187
|
mas01cr@0
|
188 char *fileTable;
|
mas01cr@0
|
189 unsigned* segTable;
|
mas01cr@0
|
190 double* dataBuf;
|
mas01cr@0
|
191 double* inBuf;
|
mas01cr@0
|
192 double* l2normTable;
|
mas01cr@0
|
193 double* qNorm;
|
mas01cr@0
|
194 double* sNorm;
|
mas01cr@0
|
195 double* timesTable;
|
mas01cr@0
|
196
|
mas01cr@0
|
197 // Flags and parameters
|
mas01cr@0
|
198 unsigned verbosity; // how much do we want to know?
|
mas01cr@0
|
199 unsigned queryType; // point queries default
|
mas01cr@0
|
200 unsigned pointNN; // how many point NNs ?
|
mas01cr@0
|
201 unsigned segNN; // how many seg NNs ?
|
mas01cr@0
|
202 unsigned sequenceLength;
|
mas01cr@0
|
203 unsigned sequenceHop;
|
mas01cr@0
|
204 unsigned queryPoint;
|
mas01cr@0
|
205 unsigned usingQueryPoint;
|
mas01cr@0
|
206 unsigned usingTimes;
|
mas01cr@0
|
207 unsigned isClient;
|
mas01cr@0
|
208 unsigned isServer;
|
mas01cr@0
|
209 unsigned port;
|
mas01cr@0
|
210 double timesTol;
|
mas01cr@0
|
211
|
mas01cr@0
|
212 // Timers
|
mas01cr@0
|
213 struct timeval tv1;
|
mas01cr@0
|
214 struct timeval tv2;
|
mas01cr@0
|
215
|
mas01cr@0
|
216
|
mas01cr@0
|
217
|
mas01cr@0
|
218
|
mas01cr@0
|
219 // private methods
|
mas01cr@0
|
220 void error(const char* a, const char* b = "");
|
mas01cr@0
|
221 void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
222 void sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
223 void segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
224 void segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
225
|
mas01cr@0
|
226 void initTables(const char* dbName, const char* inFile);
|
mas01cr@0
|
227 void NBestMatchedFilter();
|
mas01cr@0
|
228 void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
|
mas01cr@0
|
229 void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
|
mas01cr@0
|
230 void normalize(double* X, int dim, int n);
|
mas01cr@0
|
231 void normalize(double* X, int dim, int n, double minval, double maxval);
|
mas01cr@0
|
232 void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
|
mas01cr@0
|
233 unsigned getKeyPos(char* key);
|
mas01cr@0
|
234 public:
|
mas01cr@0
|
235
|
mas01cr@0
|
236 audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
237 ~audioDB();
|
mas01cr@0
|
238 int processArgs(const unsigned argc, char* const argv[]);
|
mas01cr@0
|
239 void create(const char* dbName);
|
mas01cr@0
|
240 void drop();
|
mas01cr@0
|
241 void insert(const char* dbName, const char* inFile);
|
mas01cr@0
|
242 void batchinsert(const char* dbName, const char* inFile);
|
mas01cr@0
|
243 void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
|
mas01cr@0
|
244 void status(const char* dbName);
|
mas01cr@0
|
245 void ws_status(const char*dbName, char* hostport);
|
mas01cr@0
|
246 void ws_query(const char*dbName, const char *segKey, const char* hostport);
|
mas01cr@0
|
247 void l2norm(const char* dbName);
|
mas01cr@0
|
248 void dump(const char* dbName);
|
mas01cr@0
|
249 void deleteDB(const char* dbName, const char* inFile);
|
mas01cr@0
|
250
|
mas01cr@0
|
251 // web services
|
mas01cr@0
|
252 void startServer();
|
mas01cr@0
|
253
|
mas01cr@0
|
254 };
|