Mercurial > hg > audiodb
comparison audioDB.cpp @ 0:9eab3e0f3068
Initial import
author | mas01cr |
---|---|
date | Fri, 20 Jul 2007 15:51:39 +0000 |
parents | |
children | 12be3560ff0f 69eb22e09772 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9eab3e0f3068 |
---|---|
1 /* audioDB.cpp | |
2 | |
3 audioDB version 1.0 | |
4 | |
5 A feature vector database management system for content-based retrieval. | |
6 | |
7 Usage: audioDB [OPTIONS]... | |
8 | |
9 --full-help Print help, including hidden options, and exit | |
10 -V, --version Print version and exit | |
11 -H, --help print help on audioDB usage and exit. | |
12 -v, --verbosity=detail level of detail of operational information. | |
13 (default=`1') | |
14 | |
15 Database Setup: | |
16 All database operations require a database argument. | |
17 | |
18 Database commands are UPPER CASE. Command options are lower case. | |
19 | |
20 -d, --database=filename database file required by Database commands. | |
21 -N, --NEW make a new (initially empty) database. | |
22 -S, --STATUS output database information to stdout. | |
23 -D, --DUMP output all entries: index key size. | |
24 -L, --L2NORM unit norm vectors and norm all future inserts. | |
25 | |
26 Database Insertion: | |
27 The following commands insert feature files, with optional keys and | |
28 timestamps. | |
29 | |
30 -I, --INSERT add feature vectors to an existing database. | |
31 -U, --UPDATE replace inserted vectors associated with key | |
32 with new input vectors. | |
33 -f, --features=filename binary series of vectors file {int sz:ieee | |
34 double[][sz]:eof}. | |
35 -t, --times=filename list of time points (ascii) for feature vectors. | |
36 -k, --key=identifier unique identifier associated with features. | |
37 | |
38 -B, --BATCHINSERT add feature vectors named in a --featureList | |
39 file (with optional keys in a --keyList file) | |
40 to the named database. | |
41 -F, --featureList=filename text file containing list of binary feature | |
42 vector files to process | |
43 -T, --timesList=filename text file containing list of ascii --times for | |
44 each --features file in --featureList. | |
45 -K, --keyList=filename text file containing list of unique identifiers | |
46 associated with --features. | |
47 | |
48 Database Search: | |
49 Thse commands control the retrieval behaviour. | |
50 | |
51 -Q, --QUERY=searchtype content-based search on --database using | |
52 --features as a query. Optionally restrict the | |
53 search to those segments identified in a | |
54 --keyList. (possible values="point", | |
55 "segment", "sequence") | |
56 -p, --qpoint=position ordinal position of query start point in | |
57 --features file. (default=`0') | |
58 -e, --exhaustive exhaustive search: iterate through all query | |
59 vectors in search. Overrides --qpoint. | |
60 (default=off) | |
61 -n, --pointnn=numpoints number of point nearest neighbours to use in | |
62 retrieval. (default=`10') | |
63 -R, --radius=DOUBLE radius search, returns all | |
64 points/segments/sequences inside given radius. | |
65 (default=`1.0') | |
66 -x, --expandfactor=DOUBLE time compress/expand factor of result length to | |
67 query length [1.0 .. 100.0]. (default=`1.1') | |
68 -o, --rotate rotate query vectors for rotationally invariant | |
69 search. (default=off) | |
70 -r, --resultlength=length maximum length of the result list. | |
71 (default=`10') | |
72 -l, --sequencelength=length length of sequences for sequence search. | |
73 (default=`16') | |
74 -h, --sequencehop=hop hop size of sequence window for sequence search. | |
75 (default=`1') | |
76 | |
77 Web Services: | |
78 These commands enable the database process to establish a connection via the | |
79 internet and operate as separate client and server processes. | |
80 | |
81 -s, --SERVER=port run as standalone web service on named port. | |
82 (default=`80011') | |
83 -c, --client=hostname:port run as a client using named host service. | |
84 | |
85 Copyright (C) 2007 Michael Casey, Goldsmiths, University of London | |
86 | |
87 outputs: | |
88 | |
89 key1 distance1 qpos1 spos1 | |
90 key2 distance2 qpos2 spos2 | |
91 ... | |
92 keyN distanceN qposN sposN | |
93 | |
94 */ | |
95 | |
96 #include "audioDB.h" | |
97 | |
98 #define O2_DEBUG | |
99 | |
100 void audioDB::error(const char* a, const char* b){ | |
101 cerr << a << ":" << b << endl; | |
102 exit(1); | |
103 } | |
104 | |
105 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult): | |
106 dim(0), | |
107 dbName(0), | |
108 inFile(0), | |
109 key(0), | |
110 segFile(0), | |
111 segFileName(0), | |
112 timesFile(0), | |
113 timesFileName(0), | |
114 usingTimes(0), | |
115 command(0), | |
116 dbfid(0), | |
117 db(0), | |
118 dbH(0), | |
119 infid(0), | |
120 indata(0), | |
121 queryType(O2_FLAG_POINT_QUERY), | |
122 verbosity(1), | |
123 pointNN(O2_DEFAULT_POINTNN), | |
124 segNN(O2_DEFAULT_SEGNN), | |
125 segTable(0), | |
126 fileTable(0), | |
127 dataBuf(0), | |
128 l2normTable(0), | |
129 timesTable(0), | |
130 qNorm(0), | |
131 sequenceLength(16), | |
132 sequenceHop(1), | |
133 queryPoint(0), | |
134 usingQueryPoint(0), | |
135 isClient(0), | |
136 isServer(0), | |
137 port(0), | |
138 timesTol(0.1){ | |
139 | |
140 if(processArgs(argc, argv)<0){ | |
141 printf("No command found.\n"); | |
142 cmdline_parser_print_version (); | |
143 if (strlen(gengetopt_args_info_purpose) > 0) | |
144 printf("%s\n", gengetopt_args_info_purpose); | |
145 printf("%s\n", gengetopt_args_info_usage); | |
146 printf("%s\n", gengetopt_args_info_help[1]); | |
147 printf("%s\n", gengetopt_args_info_help[2]); | |
148 printf("%s\n", gengetopt_args_info_help[0]); | |
149 exit(1); | |
150 } | |
151 | |
152 if(O2_ACTION(COM_SERVER)) | |
153 startServer(); | |
154 | |
155 else if(O2_ACTION(COM_CREATE)) | |
156 create(dbName); | |
157 | |
158 else if(O2_ACTION(COM_INSERT)) | |
159 insert(dbName, inFile); | |
160 | |
161 else if(O2_ACTION(COM_BATCHINSERT)) | |
162 batchinsert(dbName, inFile); | |
163 | |
164 else if(O2_ACTION(COM_QUERY)) | |
165 if(isClient) | |
166 ws_query(dbName, inFile, (char*)hostport); | |
167 else | |
168 query(dbName, inFile, adbQueryResult); | |
169 | |
170 else if(O2_ACTION(COM_STATUS)) | |
171 if(isClient) | |
172 ws_status(dbName,(char*)hostport); | |
173 else | |
174 status(dbName); | |
175 | |
176 else if(O2_ACTION(COM_L2NORM)) | |
177 l2norm(dbName); | |
178 | |
179 else if(O2_ACTION(COM_DUMP)) | |
180 dump(dbName); | |
181 | |
182 else | |
183 error("Unrecognized command",command); | |
184 } | |
185 | |
186 audioDB::~audioDB(){ | |
187 // Clean up | |
188 if(indata) | |
189 munmap(indata,statbuf.st_size); | |
190 if(db) | |
191 munmap(db,O2_DEFAULTDBSIZE); | |
192 if(dbfid>0) | |
193 close(dbfid); | |
194 if(infid>0) | |
195 close(infid); | |
196 if(dbH) | |
197 delete dbH; | |
198 } | |
199 | |
200 int audioDB::processArgs(const unsigned argc, char* const argv[]){ | |
201 | |
202 if(argc<2){ | |
203 cmdline_parser_print_version (); | |
204 if (strlen(gengetopt_args_info_purpose) > 0) | |
205 printf("%s\n", gengetopt_args_info_purpose); | |
206 printf("%s\n", gengetopt_args_info_usage); | |
207 printf("%s\n", gengetopt_args_info_help[1]); | |
208 printf("%s\n", gengetopt_args_info_help[2]); | |
209 printf("%s\n", gengetopt_args_info_help[0]); | |
210 exit(0); | |
211 } | |
212 | |
213 if (cmdline_parser (argc, argv, &args_info) != 0) | |
214 exit(1) ; | |
215 | |
216 if(args_info.help_given){ | |
217 cmdline_parser_print_help(); | |
218 exit(0); | |
219 } | |
220 | |
221 if(args_info.verbosity_given){ | |
222 verbosity=args_info.verbosity_arg; | |
223 if(verbosity<0 || verbosity>10){ | |
224 cerr << "Warning: verbosity out of range, setting to 1" << endl; | |
225 verbosity=1; | |
226 } | |
227 } | |
228 | |
229 if(args_info.SERVER_given){ | |
230 command=COM_SERVER; | |
231 port=args_info.SERVER_arg; | |
232 if(port<100 || port > 100000) | |
233 error("port out of range"); | |
234 isServer=1; | |
235 return 0; | |
236 } | |
237 | |
238 // No return on client command, find database command | |
239 if(args_info.client_given){ | |
240 command=COM_CLIENT; | |
241 hostport=args_info.client_arg; | |
242 isClient=1; | |
243 } | |
244 | |
245 if(args_info.NEW_given){ | |
246 command=COM_CREATE; | |
247 dbName=args_info.database_arg; | |
248 return 0; | |
249 } | |
250 | |
251 if(args_info.STATUS_given){ | |
252 command=COM_STATUS; | |
253 dbName=args_info.database_arg; | |
254 return 0; | |
255 } | |
256 | |
257 if(args_info.DUMP_given){ | |
258 command=COM_DUMP; | |
259 dbName=args_info.database_arg; | |
260 return 0; | |
261 } | |
262 | |
263 if(args_info.L2NORM_given){ | |
264 command=COM_L2NORM; | |
265 dbName=args_info.database_arg; | |
266 return 0; | |
267 } | |
268 | |
269 if(args_info.INSERT_given){ | |
270 command=COM_INSERT; | |
271 dbName=args_info.database_arg; | |
272 inFile=args_info.features_arg; | |
273 if(args_info.key_given) | |
274 key=args_info.key_arg; | |
275 if(args_info.times_given){ | |
276 timesFileName=args_info.times_arg; | |
277 if(strlen(timesFileName)>0){ | |
278 if(!(timesFile = new ifstream(timesFileName,ios::in))) | |
279 error("Could not open times file for reading", timesFileName); | |
280 usingTimes=1; | |
281 } | |
282 } | |
283 return 0; | |
284 } | |
285 | |
286 if(args_info.BATCHINSERT_given){ | |
287 command=COM_BATCHINSERT; | |
288 dbName=args_info.database_arg; | |
289 inFile=args_info.featureList_arg; | |
290 if(args_info.keyList_given) | |
291 key=args_info.keyList_arg; // INCONSISTENT NO CHECK | |
292 | |
293 /* TO DO: REPLACE WITH | |
294 if(args_info.keyList_given){ | |
295 segFileName=args_info.keyList_arg; | |
296 if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) | |
297 error("Could not open keyList file for reading",segFileName); | |
298 } | |
299 AND UPDATE BATCHINSERT() | |
300 */ | |
301 | |
302 if(args_info.timesList_given){ | |
303 timesFileName=args_info.timesList_arg; | |
304 if(strlen(timesFileName)>0){ | |
305 if(!(timesFile = new ifstream(timesFileName,ios::in))) | |
306 error("Could not open timesList file for reading", timesFileName); | |
307 usingTimes=1; | |
308 } | |
309 } | |
310 return 0; | |
311 } | |
312 | |
313 // Query command and arguments | |
314 if(args_info.QUERY_given){ | |
315 command=COM_QUERY; | |
316 dbName=args_info.database_arg; | |
317 inFile=args_info.features_arg; | |
318 | |
319 if(args_info.keyList_given){ | |
320 segFileName=args_info.keyList_arg; | |
321 if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) | |
322 error("Could not open keyList file for reading",segFileName); | |
323 } | |
324 | |
325 if(args_info.times_given){ | |
326 timesFileName=args_info.times_arg; | |
327 if(strlen(timesFileName)>0){ | |
328 if(!(timesFile = new ifstream(timesFileName,ios::in))) | |
329 error("Could not open times file for reading", timesFileName); | |
330 usingTimes=1; | |
331 } | |
332 } | |
333 | |
334 // query type | |
335 if(strncmp(args_info.QUERY_arg, "segment", MAXSTR)==0) | |
336 queryType=O2_FLAG_SEG_QUERY; | |
337 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) | |
338 queryType=O2_FLAG_POINT_QUERY; | |
339 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) | |
340 queryType=O2_FLAG_SEQUENCE_QUERY; | |
341 else | |
342 error("unsupported query type",args_info.QUERY_arg); | |
343 | |
344 if(!args_info.exhaustive_flag){ | |
345 queryPoint = args_info.qpoint_arg; | |
346 usingQueryPoint=1; | |
347 if(queryPoint<0 || queryPoint >10000) | |
348 error("queryPoint out of range: 0 <= queryPoint <= 10000"); | |
349 } | |
350 | |
351 | |
352 pointNN=args_info.pointnn_arg; | |
353 if(pointNN<1 || pointNN >1000) | |
354 error("pointNN out of range: 1 <= pointNN <= 1000"); | |
355 | |
356 | |
357 | |
358 segNN=args_info.resultlength_arg; | |
359 if(segNN<1 || segNN >1000) | |
360 error("resultlength out of range: 1 <= resultlength <= 1000"); | |
361 | |
362 | |
363 sequenceLength=args_info.sequencelength_arg; | |
364 if(sequenceLength<1 || sequenceLength >1000) | |
365 error("seqlen out of range: 1 <= seqlen <= 1000"); | |
366 | |
367 sequenceHop=args_info.sequencehop_arg; | |
368 if(sequenceHop<1 || sequenceHop >1000) | |
369 error("seqhop out of range: 1 <= seqhop <= 1000"); | |
370 | |
371 return 0; | |
372 } | |
373 return -1; // no command found | |
374 } | |
375 | |
376 /* Make a new database | |
377 | |
378 The database consists of: | |
379 | |
380 header | |
381 --------------------------------------------------------------------------------- | |
382 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes | | |
383 --------------------------------------------------------------------------------- | |
384 | |
385 | |
386 keyTable : list of keys of segments | |
387 -------------------------------------------------------------------------- | |
388 | key 256 bytes | | |
389 -------------------------------------------------------------------------- | |
390 O2_MAXFILES*02_FILENAMELENGTH | |
391 | |
392 segTable : Maps implicit feature index to a feature vector matrix | |
393 -------------------------------------------------------------------------- | |
394 | numVectors (4 bytes) | | |
395 -------------------------------------------------------------------------- | |
396 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT) | |
397 | |
398 featureTable | |
399 -------------------------------------------------------------------------- | |
400 | v1 v2 v3 ... vd (double) | | |
401 -------------------------------------------------------------------------- | |
402 O2_MAXFILES * 02_MEANNUMFEATURES * DIM * sizeof(DOUBLE) | |
403 | |
404 timesTable | |
405 -------------------------------------------------------------------------- | |
406 | timestamp (double) | | |
407 -------------------------------------------------------------------------- | |
408 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE) | |
409 | |
410 l2normTable | |
411 -------------------------------------------------------------------------- | |
412 | nm (double) | | |
413 -------------------------------------------------------------------------- | |
414 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE) | |
415 | |
416 */ | |
417 | |
418 void audioDB::create(const char* dbName){ | |
419 if ((dbfid = open (dbName, O_RDWR | O_CREAT | O_TRUNC)) < 0) | |
420 error("Can't open database file:", dbName); | |
421 | |
422 // go to the location corresponding to the last byte | |
423 if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1) | |
424 error("lseek error in db file"); | |
425 | |
426 // write a dummy byte at the last location | |
427 if (write (dbfid, "", 1) != 1) | |
428 error("write error"); | |
429 | |
430 // mmap the output file | |
431 if(verbosity) | |
432 cerr << "header size:" << O2_HEADERSIZE << endl; | |
433 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, | |
434 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | |
435 error("mmap error for creating database"); | |
436 | |
437 dbH = new dbTableHeaderT(); | |
438 assert(dbH); | |
439 | |
440 // Initialize header | |
441 dbH->magic=O2_MAGIC; | |
442 dbH->numFiles=0; | |
443 dbH->length=0; | |
444 dbH->dim=0; | |
445 dbH->flags=0; //O2_FLAG_L2NORM; | |
446 | |
447 memcpy (db, dbH, O2_HEADERSIZE); | |
448 if(verbosity) | |
449 cerr << COM_CREATE << " " << dbName << endl; | |
450 | |
451 } | |
452 | |
453 | |
454 void audioDB::drop(){ | |
455 | |
456 | |
457 } | |
458 | |
459 // initTables - memory map files passed as arguments | |
460 // Precondition: database has already been created | |
461 void audioDB::initTables(const char* dbName, const char* inFile=0){ | |
462 if ((dbfid = open (dbName, O_RDWR)) < 0) | |
463 error("Can't open database file:", dbName); | |
464 | |
465 // open the input file | |
466 if (inFile && (infid = open (inFile, O_RDONLY)) < 0) | |
467 error("can't open feature file for reading", inFile); | |
468 | |
469 // find size of input file | |
470 if (inFile && fstat (infid,&statbuf) < 0) | |
471 error("fstat error finding size of input"); | |
472 | |
473 // Get the database header info | |
474 dbH = new dbTableHeaderT(); | |
475 assert(dbH); | |
476 | |
477 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) | |
478 error("error reading db header"); | |
479 | |
480 fileTableOffset = O2_HEADERSIZE; | |
481 segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; | |
482 dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; | |
483 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | |
484 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | |
485 | |
486 if(dbH->magic!=O2_MAGIC){ | |
487 cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl; | |
488 error("database file has incorrect header",dbName); | |
489 } | |
490 | |
491 if(inFile) | |
492 if(dbH->dim==0 && dbH->length==0) // empty database | |
493 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality | |
494 else { | |
495 unsigned test; | |
496 read(infid,&test,sizeof(unsigned)); | |
497 if(dbH->dim!=test){ | |
498 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl; | |
499 error("feature dimensions do not match database table dimensions"); | |
500 } | |
501 } | |
502 | |
503 // mmap the input file | |
504 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) | |
505 == (caddr_t) -1) | |
506 error("mmap error for input"); | |
507 | |
508 // mmap the database file | |
509 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, | |
510 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | |
511 error("mmap error for creating database"); | |
512 | |
513 // Make some handy tables with correct types | |
514 fileTable= (char*)(db+fileTableOffset); | |
515 segTable = (unsigned*)(db+segTableOffset); | |
516 dataBuf = (double*)(db+dataoffset); | |
517 l2normTable = (double*)(db+l2normTableOffset); | |
518 timesTable = (double*)(db+timesTableOffset); | |
519 | |
520 } | |
521 | |
522 void audioDB::insert(const char* dbName, const char* inFile){ | |
523 | |
524 initTables(dbName, inFile); | |
525 | |
526 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
527 error("Must use timestamps with timestamped database","use --times"); | |
528 | |
529 // Check that there is room for at least 1 more file | |
530 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) | |
531 error("No more room in database","insert failed: reason database is full."); | |
532 | |
533 if(!key) | |
534 key=inFile; | |
535 // Linear scan of filenames check for pre-existing feature | |
536 unsigned alreadyInserted=0; | |
537 for(unsigned k=0; k<dbH->numFiles; k++) | |
538 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ | |
539 alreadyInserted=1; | |
540 break; | |
541 } | |
542 | |
543 if(alreadyInserted){ | |
544 if(verbosity) | |
545 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl; | |
546 return; | |
547 } | |
548 | |
549 // Make a segment index table of features to file indexes | |
550 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
551 if(!numVectors){ | |
552 if(verbosity) | |
553 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; | |
554 // CLEAN UP | |
555 munmap(indata,statbuf.st_size); | |
556 munmap(db,O2_DEFAULTDBSIZE); | |
557 close(infid); | |
558 return; | |
559 } | |
560 | |
561 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); | |
562 | |
563 unsigned insertoffset = dbH->length;// Store current state | |
564 | |
565 // Check times status and insert times from file | |
566 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); | |
567 double* timesdata=timesTable+timesoffset; | |
568 assert(timesdata+numVectors<l2normTable); | |
569 insertTimeStamps(numVectors, timesFile, timesdata); | |
570 | |
571 // Increment file count | |
572 dbH->numFiles++; | |
573 | |
574 // Update Header information | |
575 dbH->length+=(statbuf.st_size-sizeof(int)); | |
576 | |
577 // Copy the header back to the database | |
578 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
579 | |
580 // Update segment to file index map | |
581 //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | |
582 memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
583 | |
584 // Update the feature database | |
585 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | |
586 | |
587 // Norm the vectors on input if the database is already L2 normed | |
588 if(dbH->flags & O2_FLAG_L2NORM) | |
589 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append | |
590 | |
591 // Report status | |
592 status(dbName); | |
593 if(verbosity) | |
594 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " | |
595 << (statbuf.st_size-sizeof(int)) << " bytes." << endl; | |
596 | |
597 // CLEAN UP | |
598 munmap(indata,statbuf.st_size); | |
599 close(infid); | |
600 } | |
601 | |
602 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ | |
603 unsigned numtimes=0; | |
604 if(usingTimes){ | |
605 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) | |
606 dbH->flags=dbH->flags|O2_FLAG_TIMES; | |
607 else if(!(dbH->flags&O2_FLAG_TIMES)){ | |
608 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; | |
609 usingTimes=0; | |
610 } | |
611 | |
612 if(!timesFile->is_open()){ | |
613 if(dbH->flags & O2_FLAG_TIMES){ | |
614 munmap(indata,statbuf.st_size); | |
615 munmap(db,O2_DEFAULTDBSIZE); | |
616 error("problem opening times file on timestamped database",timesFileName); | |
617 } | |
618 else{ | |
619 cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl; | |
620 usingTimes=0; | |
621 } | |
622 } | |
623 | |
624 // Process time file | |
625 if(usingTimes){ | |
626 do{ | |
627 *timesFile>>*timesdata++; | |
628 if(timesFile->eof()) | |
629 break; | |
630 numtimes++; | |
631 }while(!timesFile->eof() && numtimes<numVectors); | |
632 if(!timesFile->eof()){ | |
633 double dummy; | |
634 do{ | |
635 *timesFile>>dummy; | |
636 if(timesFile->eof()) | |
637 break; | |
638 numtimes++; | |
639 }while(!timesFile->eof()); | |
640 } | |
641 if(numtimes<numVectors || numtimes>numVectors+2){ | |
642 munmap(indata,statbuf.st_size); | |
643 munmap(db,O2_DEFAULTDBSIZE); | |
644 close(infid); | |
645 cerr << "expected " << numVectors << " found " << numtimes << endl; | |
646 error("Times file is incorrect length for features file",inFile); | |
647 } | |
648 if(verbosity>2) | |
649 cerr << "numtimes: " << numtimes << endl; | |
650 } | |
651 } | |
652 } | |
653 | |
654 void audioDB::batchinsert(const char* dbName, const char* inFile){ | |
655 | |
656 if ((dbfid = open (dbName, O_RDWR)) < 0) | |
657 error("Can't open database file:", dbName); | |
658 | |
659 if(!key) | |
660 key=inFile; | |
661 ifstream *filesIn = 0; | |
662 ifstream *keysIn = 0; | |
663 ifstream* thisTimesFile = 0; | |
664 | |
665 if(!(filesIn = new ifstream(inFile))) | |
666 error("Could not open batch in file", inFile); | |
667 if(key && key!=inFile) | |
668 if(!(keysIn = new ifstream(key))) | |
669 error("Could not open batch key file",key); | |
670 | |
671 // Get the database header info | |
672 dbH = new dbTableHeaderT(); | |
673 assert(dbH); | |
674 | |
675 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) | |
676 error("error reading db header"); | |
677 | |
678 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
679 error("Must use timestamps with timestamped database","use --times"); | |
680 | |
681 fileTableOffset = O2_HEADERSIZE; | |
682 segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; | |
683 dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; | |
684 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | |
685 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | |
686 | |
687 if(dbH->magic!=O2_MAGIC){ | |
688 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; | |
689 error("database file has incorrect header",dbName); | |
690 } | |
691 | |
692 | |
693 // mmap the database file | |
694 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, | |
695 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | |
696 error("mmap error for creating database"); | |
697 | |
698 // Make some handy tables with correct types | |
699 fileTable= (char*)(db+fileTableOffset); | |
700 segTable = (unsigned*)(db+segTableOffset); | |
701 dataBuf = (double*)(db+dataoffset); | |
702 l2normTable = (double*)(db+l2normTableOffset); | |
703 timesTable = (double*)(db+timesTableOffset); | |
704 | |
705 unsigned totalVectors=0; | |
706 char *thisKey = new char[MAXSTR]; | |
707 char *thisFile = new char[MAXSTR]; | |
708 char *thisTimesFileName = new char[MAXSTR]; | |
709 | |
710 do{ | |
711 filesIn->getline(thisFile,MAXSTR); | |
712 if(key && key!=inFile) | |
713 keysIn->getline(thisKey,MAXSTR); | |
714 else | |
715 thisKey = thisFile; | |
716 if(usingTimes) | |
717 timesFile->getline(thisTimesFileName,MAXSTR); | |
718 | |
719 if(filesIn->eof()) | |
720 break; | |
721 | |
722 // open the input file | |
723 if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0) | |
724 error("can't open feature file for reading", thisFile); | |
725 | |
726 // find size of input file | |
727 if (thisFile && fstat (infid,&statbuf) < 0) | |
728 error("fstat error finding size of input"); | |
729 | |
730 // Check that there is room for at least 1 more file | |
731 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) | |
732 error("No more room in database","insert failed: reason database is full."); | |
733 | |
734 if(thisFile) | |
735 if(dbH->dim==0 && dbH->length==0) // empty database | |
736 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality | |
737 else { | |
738 unsigned test; | |
739 read(infid,&test,sizeof(unsigned)); | |
740 if(dbH->dim!=test){ | |
741 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl; | |
742 error("feature dimensions do not match database table dimensions"); | |
743 } | |
744 } | |
745 | |
746 // mmap the input file | |
747 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) | |
748 == (caddr_t) -1) | |
749 error("mmap error for input"); | |
750 | |
751 | |
752 // Linear scan of filenames check for pre-existing feature | |
753 unsigned alreadyInserted=0; | |
754 | |
755 for(unsigned k=0; k<dbH->numFiles; k++) | |
756 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ | |
757 alreadyInserted=1; | |
758 break; | |
759 } | |
760 | |
761 if(alreadyInserted){ | |
762 if(verbosity) | |
763 cerr << "Warning: key already exists in database:" << thisKey << endl; | |
764 } | |
765 else{ | |
766 | |
767 // Make a segment index table of features to file indexes | |
768 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
769 if(!numVectors){ | |
770 if(verbosity) | |
771 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; | |
772 } | |
773 else{ | |
774 if(usingTimes){ | |
775 if(timesFile->eof()) | |
776 error("not enough timestamp files in timesList"); | |
777 thisTimesFile=new ifstream(thisTimesFileName,ios::in); | |
778 if(!thisTimesFile->is_open()) | |
779 error("Cannot open timestamp file",thisTimesFileName); | |
780 unsigned insertoffset=dbH->length; | |
781 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); | |
782 double* timesdata=timesTable+timesoffset; | |
783 assert(timesdata+numVectors<l2normTable); | |
784 insertTimeStamps(numVectors,thisTimesFile,timesdata); | |
785 if(thisTimesFile) | |
786 delete thisTimesFile; | |
787 } | |
788 | |
789 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); | |
790 | |
791 unsigned insertoffset = dbH->length;// Store current state | |
792 | |
793 // Increment file count | |
794 dbH->numFiles++; | |
795 | |
796 // Update Header information | |
797 dbH->length+=(statbuf.st_size-sizeof(int)); | |
798 // Copy the header back to the database | |
799 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
800 | |
801 // Update segment to file index map | |
802 //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | |
803 memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
804 | |
805 // Update the feature database | |
806 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | |
807 | |
808 // Norm the vectors on input if the database is already L2 normed | |
809 if(dbH->flags & O2_FLAG_L2NORM) | |
810 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append | |
811 | |
812 totalVectors+=numVectors; | |
813 } | |
814 } | |
815 // CLEAN UP | |
816 munmap(indata,statbuf.st_size); | |
817 close(infid); | |
818 }while(!filesIn->eof()); | |
819 | |
820 if(verbosity) | |
821 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " | |
822 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; | |
823 | |
824 // Report status | |
825 status(dbName); | |
826 | |
827 } | |
828 | |
829 void audioDB::ws_status(const char*dbName, char* hostport){ | |
830 struct soap soap; | |
831 int adbStatusResult; | |
832 | |
833 // Query an existing adb database | |
834 soap_init(&soap); | |
835 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK) | |
836 std::cout << "result = " << adbStatusResult << std::endl; | |
837 else | |
838 soap_print_fault(&soap,stderr); | |
839 | |
840 soap_destroy(&soap); | |
841 soap_end(&soap); | |
842 soap_done(&soap); | |
843 } | |
844 | |
845 void audioDB::ws_query(const char*dbName, const char *segKey, const char* hostport){ | |
846 struct soap soap; | |
847 adb__queryResult adbQueryResult; | |
848 | |
849 soap_init(&soap); | |
850 if(soap_call_adb__query(&soap,hostport,NULL, | |
851 (char*)dbName,(char*)segKey,(char*)segFileName,(char*)timesFileName, | |
852 queryType, queryPoint, pointNN, segNN, sequenceLength, adbQueryResult)==SOAP_OK){ | |
853 //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl; | |
854 for(int i=0; i<adbQueryResult.__sizeRlist; i++) | |
855 std::cout << adbQueryResult.Rlist[i] << " " << adbQueryResult.Dist[i] | |
856 << " " << adbQueryResult.Qpos[i] << " " << adbQueryResult.Spos[i] << std::endl; | |
857 } | |
858 else | |
859 soap_print_fault(&soap,stderr); | |
860 | |
861 soap_destroy(&soap); | |
862 soap_end(&soap); | |
863 soap_done(&soap); | |
864 | |
865 } | |
866 | |
867 | |
868 void audioDB::status(const char* dbName){ | |
869 if(!dbH) | |
870 initTables(dbName, 0); | |
871 | |
872 // Update Header information | |
873 cout << "num files:" << dbH->numFiles << endl; | |
874 cout << "data dim:" << dbH->dim <<endl; | |
875 if(dbH->dim>0){ | |
876 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; | |
877 cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; | |
878 } | |
879 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl; | |
880 cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" << | |
881 (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl; | |
882 cout << "flags:" << dbH->flags << endl; | |
883 | |
884 unsigned dudCount=0; | |
885 unsigned nullCount=0; | |
886 for(unsigned k=0; k<dbH->numFiles; k++){ | |
887 if(segTable[k]<sequenceLength){ | |
888 dudCount++; | |
889 if(!segTable[k]) | |
890 nullCount++; | |
891 } | |
892 } | |
893 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; | |
894 } | |
895 | |
896 | |
897 void audioDB::dump(const char* dbName){ | |
898 if(!dbH) | |
899 initTables(dbName,0); | |
900 | |
901 for(unsigned k=0; k<dbH->numFiles; k++) | |
902 cout << fileTable+k*O2_FILETABLESIZE << " " << segTable[k] << endl; | |
903 | |
904 status(dbName); | |
905 } | |
906 | |
907 void audioDB::l2norm(const char* dbName){ | |
908 initTables(dbName,0); | |
909 if(dbH->length>0){ | |
910 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); | |
911 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append | |
912 } | |
913 // Update database flags | |
914 dbH->flags = dbH->flags|O2_FLAG_L2NORM; | |
915 memcpy (db, dbH, O2_HEADERSIZE); | |
916 } | |
917 | |
918 | |
919 | |
920 void audioDB::query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | |
921 switch(queryType){ | |
922 case O2_FLAG_POINT_QUERY: | |
923 pointQuery(dbName, inFile, adbQueryResult); | |
924 break; | |
925 case O2_FLAG_SEQUENCE_QUERY: | |
926 segSequenceQuery(dbName, inFile, adbQueryResult); | |
927 break; | |
928 case O2_FLAG_SEG_QUERY: | |
929 segPointQuery(dbName, inFile, adbQueryResult); | |
930 break; | |
931 default: | |
932 error("unrecognized queryType in query()"); | |
933 | |
934 } | |
935 } | |
936 | |
937 //return ordinal position of key in keyTable | |
938 unsigned audioDB::getKeyPos(char* key){ | |
939 for(unsigned k=0; k<dbH->numFiles; k++) | |
940 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) | |
941 return k; | |
942 error("Key not found",key); | |
943 return O2_ERR_KEYNOTFOUND; | |
944 } | |
945 | |
946 // Basic point query engine | |
947 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | |
948 | |
949 initTables(dbName, inFile); | |
950 | |
951 // For each input vector, find the closest pointNN matching output vectors and report | |
952 // we use stdout in this stub version | |
953 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
954 | |
955 double* query = (double*)(indata+sizeof(int)); | |
956 double* data = dataBuf; | |
957 double* queryCopy = 0; | |
958 | |
959 if( dbH->flags & O2_FLAG_L2NORM ){ | |
960 // Make a copy of the query | |
961 queryCopy = new double[numVectors*dbH->dim]; | |
962 qNorm = new double[numVectors]; | |
963 assert(queryCopy&&qNorm); | |
964 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
965 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
966 query = queryCopy; | |
967 } | |
968 | |
969 // Make temporary dynamic memory for results | |
970 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
971 double distances[pointNN]; | |
972 unsigned qIndexes[pointNN]; | |
973 unsigned sIndexes[pointNN]; | |
974 for(unsigned k=0; k<pointNN; k++){ | |
975 distances[k]=0.0; | |
976 qIndexes[k]=~0; | |
977 sIndexes[k]=~0; | |
978 } | |
979 | |
980 unsigned j=numVectors; | |
981 unsigned k,l,n; | |
982 double thisDist; | |
983 | |
984 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); | |
985 double meanQdur = 0; | |
986 double* timesdata = 0; | |
987 double* dbdurs = 0; | |
988 | |
989 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
990 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
991 usingTimes=0; | |
992 } | |
993 | |
994 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
995 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
996 | |
997 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
998 timesdata = new double[numVectors]; | |
999 insertTimeStamps(numVectors, timesFile, timesdata); | |
1000 // Calculate durations of points | |
1001 for(k=0; k<numVectors-1; k++){ | |
1002 timesdata[k]=timesdata[k+1]-timesdata[k]; | |
1003 meanQdur+=timesdata[k]; | |
1004 } | |
1005 meanQdur/=k; | |
1006 // Individual exhaustive timepoint durations | |
1007 dbdurs = new double[totalVecs]; | |
1008 for(k=0; k<totalVecs-1; k++) | |
1009 dbdurs[k]=timesTable[k+1]-timesTable[k]; | |
1010 j--; // decrement vector counter by one | |
1011 } | |
1012 | |
1013 if(usingQueryPoint) | |
1014 if(queryPoint>numVectors-1) | |
1015 error("queryPoint > numVectors in query"); | |
1016 else{ | |
1017 if(verbosity>1) | |
1018 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1019 query=query+queryPoint*dbH->dim; | |
1020 numVectors=queryPoint+1; | |
1021 j=1; | |
1022 } | |
1023 | |
1024 gettimeofday(&tv1, NULL); | |
1025 while(j--){ // query | |
1026 data=dataBuf; | |
1027 k=totalVecs; // number of database vectors | |
1028 while(k--){ // database | |
1029 thisDist=0; | |
1030 l=dbH->dim; | |
1031 double* q=query; | |
1032 while(l--) | |
1033 thisDist+=*q++**data++; | |
1034 if(!usingTimes || | |
1035 (usingTimes | |
1036 && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){ | |
1037 n=pointNN; | |
1038 while(n--){ | |
1039 if(thisDist>=distances[n]){ | |
1040 if((n==0 || thisDist<=distances[n-1])){ | |
1041 // Copy all values above up the queue | |
1042 for( l=pointNN-1 ; l >= n+1 ; l--){ | |
1043 distances[l]=distances[l-1]; | |
1044 qIndexes[l]=qIndexes[l-1]; | |
1045 sIndexes[l]=sIndexes[l-1]; | |
1046 } | |
1047 distances[n]=thisDist; | |
1048 qIndexes[n]=numVectors-j-1; | |
1049 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; | |
1050 break; | |
1051 } | |
1052 } | |
1053 else | |
1054 break; | |
1055 } | |
1056 } | |
1057 } | |
1058 // Move query pointer to next query point | |
1059 query+=dbH->dim; | |
1060 } | |
1061 | |
1062 gettimeofday(&tv2, NULL); | |
1063 if(verbosity>1) | |
1064 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
1065 | |
1066 if(adbQueryResult==0){ | |
1067 // Output answer | |
1068 // Loop over nearest neighbours | |
1069 for(k=0; k < pointNN; k++){ | |
1070 // Scan for key | |
1071 unsigned cumSeg=0; | |
1072 for(l=0 ; l<dbH->numFiles; l++){ | |
1073 cumSeg+=segTable[l]; | |
1074 if(sIndexes[k]<cumSeg){ | |
1075 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " | |
1076 << sIndexes[k]+segTable[l]-cumSeg << endl; | |
1077 break; | |
1078 } | |
1079 } | |
1080 } | |
1081 } | |
1082 else{ // Process Web Services Query | |
1083 int listLen = pointNN; | |
1084 adbQueryResult->__sizeRlist=listLen; | |
1085 adbQueryResult->__sizeDist=listLen; | |
1086 adbQueryResult->__sizeQpos=listLen; | |
1087 adbQueryResult->__sizeSpos=listLen; | |
1088 adbQueryResult->Rlist= new char*[listLen]; | |
1089 adbQueryResult->Dist = new double[listLen]; | |
1090 adbQueryResult->Qpos = new int[listLen]; | |
1091 adbQueryResult->Spos = new int[listLen]; | |
1092 for(k=0; k<adbQueryResult->__sizeRlist; k++){ | |
1093 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | |
1094 adbQueryResult->Dist[k]=distances[k]; | |
1095 adbQueryResult->Qpos[k]=qIndexes[k]; | |
1096 unsigned cumSeg=0; | |
1097 for(l=0 ; l<dbH->numFiles; l++){ | |
1098 cumSeg+=segTable[l]; | |
1099 if(sIndexes[k]<cumSeg){ | |
1100 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); | |
1101 break; | |
1102 } | |
1103 } | |
1104 adbQueryResult->Spos[k]=sIndexes[k]+segTable[l]-cumSeg; | |
1105 } | |
1106 } | |
1107 | |
1108 // Clean up | |
1109 if(queryCopy) | |
1110 delete queryCopy; | |
1111 if(qNorm) | |
1112 delete qNorm; | |
1113 if(timesdata) | |
1114 delete timesdata; | |
1115 if(dbdurs) | |
1116 delete dbdurs; | |
1117 } | |
1118 | |
1119 void audioDB::sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | |
1120 | |
1121 } | |
1122 | |
1123 // segPointQuery | |
1124 // return the segNN closest segs to the query seg | |
1125 // uses average of pointNN points per seg | |
1126 void audioDB::segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | |
1127 initTables(dbName, inFile); | |
1128 | |
1129 // For each input vector, find the closest pointNN matching output vectors and report | |
1130 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1131 unsigned numSegs = dbH->numFiles; | |
1132 | |
1133 double* query = (double*)(indata+sizeof(int)); | |
1134 double* data = dataBuf; | |
1135 double* queryCopy = 0; | |
1136 | |
1137 if( dbH->flags & O2_FLAG_L2NORM ){ | |
1138 // Make a copy of the query | |
1139 queryCopy = new double[numVectors*dbH->dim]; | |
1140 qNorm = new double[numVectors]; | |
1141 assert(queryCopy&&qNorm); | |
1142 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1143 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1144 query = queryCopy; | |
1145 } | |
1146 | |
1147 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1148 assert(segNN>0 && segNN<=O2_MAXNN); | |
1149 | |
1150 // Make temporary dynamic memory for results | |
1151 double segDistances[segNN]; | |
1152 unsigned segIDs[segNN]; | |
1153 unsigned segQIndexes[segNN]; | |
1154 unsigned segSIndexes[segNN]; | |
1155 | |
1156 double distances[pointNN]; | |
1157 unsigned qIndexes[pointNN]; | |
1158 unsigned sIndexes[pointNN]; | |
1159 | |
1160 unsigned j=numVectors; // number of query points | |
1161 unsigned k,l,n, seg, segOffset=0, processedSegs=0; | |
1162 double thisDist; | |
1163 | |
1164 for(k=0; k<pointNN; k++){ | |
1165 distances[k]=0.0; | |
1166 qIndexes[k]=~0; | |
1167 sIndexes[k]=~0; | |
1168 } | |
1169 | |
1170 for(k=0; k<segNN; k++){ | |
1171 segDistances[k]=0.0; | |
1172 segQIndexes[k]=~0; | |
1173 segSIndexes[k]=~0; | |
1174 segIDs[k]=~0; | |
1175 } | |
1176 | |
1177 double meanQdur = 0; | |
1178 double* timesdata = 0; | |
1179 double* meanDBdur = 0; | |
1180 | |
1181 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1182 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1183 usingTimes=0; | |
1184 } | |
1185 | |
1186 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1187 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1188 | |
1189 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1190 timesdata = new double[numVectors]; | |
1191 insertTimeStamps(numVectors, timesFile, timesdata); | |
1192 // Calculate durations of points | |
1193 for(k=0; k<numVectors-1; k++){ | |
1194 timesdata[k]=timesdata[k+1]-timesdata[k]; | |
1195 meanQdur+=timesdata[k]; | |
1196 } | |
1197 meanQdur/=k; | |
1198 meanDBdur = new double[dbH->numFiles]; | |
1199 for(k=0; k<dbH->numFiles; k++){ | |
1200 meanDBdur[k]=0.0; | |
1201 for(j=0; j<segTable[k]-1 ; j++) | |
1202 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | |
1203 meanDBdur[k]/=j; | |
1204 } | |
1205 } | |
1206 | |
1207 if(usingQueryPoint) | |
1208 if(queryPoint>numVectors-1) | |
1209 error("queryPoint > numVectors in query"); | |
1210 else{ | |
1211 if(verbosity>1) | |
1212 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1213 query=query+queryPoint*dbH->dim; | |
1214 numVectors=queryPoint+1; | |
1215 } | |
1216 | |
1217 // build segment offset table | |
1218 unsigned *segOffsetTable = new unsigned[dbH->numFiles]; | |
1219 unsigned cumSeg=0; | |
1220 unsigned segIndexOffset; | |
1221 for(k=0; k<dbH->numFiles;k++){ | |
1222 segOffsetTable[k]=cumSeg; | |
1223 cumSeg+=segTable[k]*dbH->dim; | |
1224 } | |
1225 | |
1226 char nextKey[MAXSTR]; | |
1227 | |
1228 gettimeofday(&tv1, NULL); | |
1229 | |
1230 for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){ | |
1231 if(segFile){ | |
1232 if(!segFile->eof()){ | |
1233 //*segFile>>seg; | |
1234 segFile->getline(nextKey,MAXSTR); | |
1235 seg=getKeyPos(nextKey); | |
1236 } | |
1237 else | |
1238 break; | |
1239 } | |
1240 segOffset=segOffsetTable[seg]; // numDoubles offset | |
1241 segIndexOffset=segOffset/dbH->dim; // numVectors offset | |
1242 if(verbosity>7) | |
1243 cerr << seg << "." << segOffset/(dbH->dim) << "." << segTable[seg] << " | ";cerr.flush(); | |
1244 | |
1245 if(dbH->flags & O2_FLAG_L2NORM) | |
1246 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; | |
1247 else | |
1248 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); | |
1249 if(usingQueryPoint) | |
1250 j=1; | |
1251 else | |
1252 j=numVectors; | |
1253 while(j--){ | |
1254 k=segTable[seg]; // number of vectors in seg | |
1255 data=dataBuf+segOffset; // data for seg | |
1256 while(k--){ | |
1257 thisDist=0; | |
1258 l=dbH->dim; | |
1259 double* q=query; | |
1260 while(l--) | |
1261 thisDist+=*q++**data++; | |
1262 if(!usingTimes || | |
1263 (usingTimes | |
1264 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){ | |
1265 n=pointNN; | |
1266 while(n--){ | |
1267 if(thisDist>=distances[n]){ | |
1268 if((n==0 || thisDist<=distances[n-1])){ | |
1269 // Copy all values above up the queue | |
1270 for( l=pointNN-1 ; l > n ; l--){ | |
1271 distances[l]=distances[l-1]; | |
1272 qIndexes[l]=qIndexes[l-1]; | |
1273 sIndexes[l]=sIndexes[l-1]; | |
1274 } | |
1275 distances[n]=thisDist; | |
1276 qIndexes[n]=numVectors-j-1; | |
1277 sIndexes[n]=segTable[seg]-k-1; | |
1278 break; | |
1279 } | |
1280 } | |
1281 else | |
1282 break; | |
1283 } | |
1284 } | |
1285 } // seg | |
1286 // Move query pointer to next query point | |
1287 query+=dbH->dim; | |
1288 } // query | |
1289 // Take the average of this seg's distance | |
1290 // Test the seg distances | |
1291 thisDist=0; | |
1292 n=pointNN; | |
1293 while(n--) | |
1294 thisDist+=distances[pointNN-n-1]; | |
1295 thisDist/=pointNN; | |
1296 n=segNN; | |
1297 while(n--){ | |
1298 if(thisDist>=segDistances[n]){ | |
1299 if((n==0 || thisDist<=segDistances[n-1])){ | |
1300 // Copy all values above up the queue | |
1301 for( l=pointNN-1 ; l > n ; l--){ | |
1302 segDistances[l]=segDistances[l-1]; | |
1303 segQIndexes[l]=segQIndexes[l-1]; | |
1304 segSIndexes[l]=segSIndexes[l-1]; | |
1305 segIDs[l]=segIDs[l-1]; | |
1306 } | |
1307 segDistances[n]=thisDist; | |
1308 segQIndexes[n]=qIndexes[0]; | |
1309 segSIndexes[n]=sIndexes[0]; | |
1310 segIDs[n]=seg; | |
1311 break; | |
1312 } | |
1313 } | |
1314 else | |
1315 break; | |
1316 } | |
1317 for(unsigned k=0; k<pointNN; k++){ | |
1318 distances[k]=0.0; | |
1319 qIndexes[k]=~0; | |
1320 sIndexes[k]=~0; | |
1321 } | |
1322 } // segs | |
1323 gettimeofday(&tv2, NULL); | |
1324 | |
1325 if(verbosity>1) | |
1326 cerr << endl << "processed segs :" << processedSegs | |
1327 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
1328 | |
1329 if(adbQueryResult==0){ | |
1330 if(verbosity>1) | |
1331 cerr<<endl; | |
1332 // Output answer | |
1333 // Loop over nearest neighbours | |
1334 for(k=0; k < min(segNN,processedSegs); k++) | |
1335 cout << fileTable+segIDs[k]*O2_FILETABLESIZE | |
1336 << " " << segDistances[k] << " " << segQIndexes[k] << " " << segSIndexes[k] << endl; | |
1337 } | |
1338 else{ // Process Web Services Query | |
1339 int listLen = min(segNN, processedSegs); | |
1340 adbQueryResult->__sizeRlist=listLen; | |
1341 adbQueryResult->__sizeDist=listLen; | |
1342 adbQueryResult->__sizeQpos=listLen; | |
1343 adbQueryResult->__sizeSpos=listLen; | |
1344 adbQueryResult->Rlist= new char*[listLen]; | |
1345 adbQueryResult->Dist = new double[listLen]; | |
1346 adbQueryResult->Qpos = new int[listLen]; | |
1347 adbQueryResult->Spos = new int[listLen]; | |
1348 for(k=0; k<adbQueryResult->__sizeRlist; k++){ | |
1349 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | |
1350 adbQueryResult->Dist[k]=segDistances[k]; | |
1351 adbQueryResult->Qpos[k]=segQIndexes[k]; | |
1352 adbQueryResult->Spos[k]=segSIndexes[k]; | |
1353 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); | |
1354 } | |
1355 } | |
1356 | |
1357 | |
1358 // Clean up | |
1359 if(segOffsetTable) | |
1360 delete segOffsetTable; | |
1361 if(queryCopy) | |
1362 delete queryCopy; | |
1363 if(qNorm) | |
1364 delete qNorm; | |
1365 if(timesdata) | |
1366 delete timesdata; | |
1367 if(meanDBdur) | |
1368 delete meanDBdur; | |
1369 | |
1370 } | |
1371 | |
1372 void audioDB::deleteDB(const char* dbName, const char* inFile){ | |
1373 | |
1374 } | |
1375 | |
1376 // NBest matched filter distance between query and target segs | |
1377 // efficient implementation | |
1378 // outputs average of N minimum matched filter distances | |
1379 void audioDB::segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | |
1380 | |
1381 initTables(dbName, inFile); | |
1382 | |
1383 // For each input vector, find the closest pointNN matching output vectors and report | |
1384 // we use stdout in this stub version | |
1385 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1386 unsigned numSegs = dbH->numFiles; | |
1387 | |
1388 double* query = (double*)(indata+sizeof(int)); | |
1389 double* data = dataBuf; | |
1390 double* queryCopy = 0; | |
1391 | |
1392 double qMeanL2; | |
1393 double* sMeanL2; | |
1394 | |
1395 unsigned USE_THRESH=0; | |
1396 double SILENCE_THRESH=0; | |
1397 double DIFF_THRESH=0; | |
1398 | |
1399 if(!(dbH->flags & O2_FLAG_L2NORM) ) | |
1400 error("Database must be L2 normed for sequence query","use -l2norm"); | |
1401 | |
1402 if(verbosity>1) | |
1403 cerr << "performing norms ... "; cerr.flush(); | |
1404 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | |
1405 // Make a copy of the query | |
1406 queryCopy = new double[numVectors*dbH->dim]; | |
1407 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1408 qNorm = new double[numVectors]; | |
1409 sNorm = new double[dbVectors]; | |
1410 sMeanL2=new double[dbH->numFiles]; | |
1411 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); | |
1412 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1413 query = queryCopy; | |
1414 // Make norm measurements relative to sequenceLength | |
1415 unsigned w = sequenceLength-1; | |
1416 unsigned i,j; | |
1417 double* ps; | |
1418 double tmp1,tmp2; | |
1419 // Copy the L2 norm values to core to avoid disk random access later on | |
1420 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | |
1421 double* snPtr = sNorm; | |
1422 for(i=0; i<dbH->numFiles; i++){ | |
1423 if(segTable[i]>sequenceLength){ | |
1424 tmp1=*snPtr; | |
1425 j=1; | |
1426 w=sequenceLength-1; | |
1427 while(w--) | |
1428 *snPtr+=snPtr[j++]; | |
1429 ps = snPtr+1; | |
1430 w=segTable[i]-sequenceLength; // +1 - 1 | |
1431 while(w--){ | |
1432 tmp2=*ps; | |
1433 *ps=*(ps-1)-tmp1+*(ps+sequenceLength); | |
1434 tmp1=tmp2; | |
1435 ps++; | |
1436 } | |
1437 } | |
1438 snPtr+=segTable[i]; | |
1439 } | |
1440 | |
1441 double* pn = sMeanL2; | |
1442 w=dbH->numFiles; | |
1443 while(w--) | |
1444 *pn++=0.0; | |
1445 ps=sNorm; | |
1446 unsigned processedSegs=0; | |
1447 for(i=0; i<dbH->numFiles; i++){ | |
1448 if(segTable[i]>sequenceLength-1){ | |
1449 w = segTable[i]-sequenceLength+1; | |
1450 pn = sMeanL2+i; | |
1451 while(w--) | |
1452 *pn+=*ps++; | |
1453 *pn/=segTable[i]-sequenceLength+1; | |
1454 SILENCE_THRESH+=*pn; | |
1455 processedSegs++; | |
1456 } | |
1457 } | |
1458 if(verbosity>1) | |
1459 cerr << "processedSegs: " << processedSegs << endl; | |
1460 SILENCE_THRESH/=processedSegs; | |
1461 USE_THRESH=1; // Turn thresholding on | |
1462 DIFF_THRESH=SILENCE_THRESH/=2; // 50% of the mean shingle power | |
1463 SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE | |
1464 | |
1465 w=sequenceLength-1; | |
1466 i=1; | |
1467 tmp1=*qNorm; | |
1468 while(w--) | |
1469 *qNorm+=qNorm[i++]; | |
1470 ps = qNorm+1; | |
1471 qMeanL2 = *qNorm; | |
1472 w=numVectors-sequenceLength; | |
1473 while(w--){ | |
1474 tmp2=*ps; | |
1475 *ps=*(ps-1)-tmp1+*(ps+sequenceLength); | |
1476 tmp1=tmp2; | |
1477 qMeanL2+=*ps; | |
1478 *ps++; | |
1479 } | |
1480 qMeanL2 /= numVectors-sequenceLength+1; | |
1481 if(verbosity>1) | |
1482 cerr << "done." << endl; | |
1483 | |
1484 | |
1485 if(verbosity>1) | |
1486 cerr << "matching segs..." << endl; | |
1487 | |
1488 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1489 assert(segNN>0 && segNN<=O2_MAXNN); | |
1490 | |
1491 // Make temporary dynamic memory for results | |
1492 double segDistances[segNN]; | |
1493 unsigned segIDs[segNN]; | |
1494 unsigned segQIndexes[segNN]; | |
1495 unsigned segSIndexes[segNN]; | |
1496 | |
1497 double distances[pointNN]; | |
1498 unsigned qIndexes[pointNN]; | |
1499 unsigned sIndexes[pointNN]; | |
1500 | |
1501 | |
1502 unsigned k,l,m,n,seg,segOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | |
1503 double thisDist; | |
1504 double oneOverWL=1.0/wL; | |
1505 | |
1506 for(k=0; k<pointNN; k++){ | |
1507 distances[k]=0.0; | |
1508 qIndexes[k]=~0; | |
1509 sIndexes[k]=~0; | |
1510 } | |
1511 | |
1512 for(k=0; k<segNN; k++){ | |
1513 segDistances[k]=0.0; | |
1514 segQIndexes[k]=~0; | |
1515 segSIndexes[k]=~0; | |
1516 segIDs[k]=~0; | |
1517 } | |
1518 | |
1519 // Timestamp and durations processing | |
1520 double meanQdur = 0; | |
1521 double* timesdata = 0; | |
1522 double* meanDBdur = 0; | |
1523 | |
1524 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1525 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1526 usingTimes=0; | |
1527 } | |
1528 | |
1529 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1530 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1531 | |
1532 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1533 timesdata = new double[numVectors]; | |
1534 assert(timesdata); | |
1535 insertTimeStamps(numVectors, timesFile, timesdata); | |
1536 // Calculate durations of points | |
1537 for(k=0; k<numVectors-1; k++){ | |
1538 timesdata[k]=timesdata[k+1]-timesdata[k]; | |
1539 meanQdur+=timesdata[k]; | |
1540 } | |
1541 meanQdur/=k; | |
1542 if(verbosity>1) | |
1543 cerr << "mean query file duration: " << meanQdur << endl; | |
1544 meanDBdur = new double[dbH->numFiles]; | |
1545 assert(meanDBdur); | |
1546 for(k=0; k<dbH->numFiles; k++){ | |
1547 meanDBdur[k]=0.0; | |
1548 for(j=0; j<segTable[k]-1 ; j++) | |
1549 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | |
1550 meanDBdur[k]/=j; | |
1551 } | |
1552 } | |
1553 | |
1554 if(usingQueryPoint) | |
1555 if(queryPoint>numVectors || queryPoint>numVectors-wL+1) | |
1556 error("queryPoint > numVectors-wL+1 in query"); | |
1557 else{ | |
1558 if(verbosity>1) | |
1559 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1560 query=query+queryPoint*dbH->dim; | |
1561 qNorm=qNorm+queryPoint; | |
1562 numVectors=wL; | |
1563 } | |
1564 | |
1565 double ** D = 0; // Cross-correlation between query and target | |
1566 double ** DD = 0; // Matched filter distance | |
1567 | |
1568 D = new double*[numVectors]; | |
1569 assert(D); | |
1570 DD = new double*[numVectors]; | |
1571 assert(DD); | |
1572 | |
1573 gettimeofday(&tv1, NULL); | |
1574 processedSegs=0; | |
1575 unsigned successfulSegs=0; | |
1576 | |
1577 double* qp; | |
1578 double* sp; | |
1579 double* dp; | |
1580 double diffL2; | |
1581 | |
1582 // build segment offset table | |
1583 unsigned *segOffsetTable = new unsigned[dbH->numFiles]; | |
1584 unsigned cumSeg=0; | |
1585 unsigned segIndexOffset; | |
1586 for(k=0; k<dbH->numFiles;k++){ | |
1587 segOffsetTable[k]=cumSeg; | |
1588 cumSeg+=segTable[k]*dbH->dim; | |
1589 } | |
1590 | |
1591 char nextKey [MAXSTR]; | |
1592 for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){ | |
1593 | |
1594 // get segID from file if using a control file | |
1595 if(segFile){ | |
1596 if(!segFile->eof()){ | |
1597 segFile->getline(nextKey,MAXSTR); | |
1598 seg=getKeyPos(nextKey); | |
1599 } | |
1600 else | |
1601 break; | |
1602 } | |
1603 segOffset=segOffsetTable[seg]; // numDoubles offset | |
1604 segIndexOffset=segOffset/dbH->dim; // numVectors offset | |
1605 | |
1606 if(sequenceLength<segTable[seg]){ // test for short sequences | |
1607 | |
1608 if(verbosity>7) | |
1609 cerr << seg << "." << segIndexOffset << "." << segTable[seg] << " | ";cerr.flush(); | |
1610 | |
1611 // Cross-correlation matrix | |
1612 for(j=0; j<numVectors;j++){ | |
1613 D[j]=new double[segTable[seg]]; | |
1614 assert(D[j]); | |
1615 | |
1616 } | |
1617 | |
1618 // Matched filter matrix | |
1619 for(j=0; j<numVectors;j++){ | |
1620 DD[j]=new double[segTable[seg]]; | |
1621 assert(DD[j]); | |
1622 } | |
1623 | |
1624 // Cross Correlation | |
1625 for(j=0; j<numVectors; j++) | |
1626 for(k=0; k<segTable[seg]; k++){ | |
1627 qp=query+j*dbH->dim; | |
1628 sp=dataBuf+segOffset+k*dbH->dim; | |
1629 DD[j][k]=0.0; // Initialize matched filter array | |
1630 dp=&D[j][k]; // point to correlation cell j,k | |
1631 *dp=0.0; // initialize correlation cell | |
1632 l=dbH->dim; // size of vectors | |
1633 while(l--) | |
1634 *dp+=*qp++**sp++; | |
1635 } | |
1636 | |
1637 // Matched Filter | |
1638 // HOP SIZE == 1 | |
1639 double* spd; | |
1640 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | |
1641 for(w=0; w<wL; w++) | |
1642 for(j=0; j<numVectors-w; j++){ | |
1643 sp=DD[j]; | |
1644 spd=D[j+w]+w; | |
1645 k=segTable[seg]-w; | |
1646 while(k--) | |
1647 *sp+++=*spd++; | |
1648 } | |
1649 } | |
1650 else{ // HOP_SIZE != 1 | |
1651 for(w=0; w<wL; w++) | |
1652 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | |
1653 sp=DD[j]; | |
1654 spd=D[j+w]+w; | |
1655 for(k=0; k<segTable[seg]-w; k+=HOP_SIZE){ | |
1656 *sp+=*spd; | |
1657 sp+=HOP_SIZE; | |
1658 spd+=HOP_SIZE; | |
1659 } | |
1660 } | |
1661 } | |
1662 | |
1663 if(verbosity>3){ | |
1664 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl; | |
1665 cerr.flush(); | |
1666 } | |
1667 | |
1668 if(!usingTimes || | |
1669 (usingTimes | |
1670 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){ | |
1671 | |
1672 if(verbosity>3){ | |
1673 cerr << "within duration tolerance." << endl; | |
1674 cerr.flush(); | |
1675 } | |
1676 | |
1677 // Search for minimum distance by shingles (concatenated vectors) | |
1678 for(j=0;j<numVectors-wL+1;j+=HOP_SIZE) | |
1679 for(k=0;k<segTable[seg]-wL+1;k+=HOP_SIZE){ | |
1680 | |
1681 diffL2 = fabs(qNorm[j] - sNorm[k]); | |
1682 // Power test | |
1683 if(!USE_THRESH || | |
1684 // Threshold on mean L2 of Q and S sequences | |
1685 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[k]>SILENCE_THRESH && | |
1686 // Are both query and target windows above mean energy? | |
1687 (qNorm[j]>qMeanL2*.25 && sNorm[k]>sMeanL2[seg]*.25 && diffL2 < DIFF_THRESH ))) | |
1688 thisDist=DD[j][k]*oneOverWL; | |
1689 else | |
1690 thisDist=0.0; | |
1691 | |
1692 // NBest match algorithm | |
1693 for(m=0; m<pointNN; m++){ | |
1694 if(thisDist>=distances[m]){ | |
1695 // Shuffle distances up the list | |
1696 for(l=pointNN-1; l>m; l--){ | |
1697 distances[l]=distances[l-1]; | |
1698 qIndexes[l]=qIndexes[l-1]; | |
1699 sIndexes[l]=sIndexes[l-1]; | |
1700 } | |
1701 distances[m]=thisDist; | |
1702 if(usingQueryPoint) | |
1703 qIndexes[m]=queryPoint; | |
1704 else | |
1705 qIndexes[m]=j; | |
1706 sIndexes[m]=k; | |
1707 break; | |
1708 } | |
1709 } | |
1710 } | |
1711 // Calculate the mean of the N-Best matches | |
1712 thisDist=0.0; | |
1713 for(m=0; m<pointNN; m++) | |
1714 thisDist+=distances[m]; | |
1715 thisDist/=pointNN; | |
1716 | |
1717 // All the seg stuff goes here | |
1718 n=segNN; | |
1719 while(n--){ | |
1720 if(thisDist>=segDistances[n]){ | |
1721 if((n==0 || thisDist<=segDistances[n-1])){ | |
1722 // Copy all values above up the queue | |
1723 for( l=segNN-1 ; l > n ; l--){ | |
1724 segDistances[l]=segDistances[l-1]; | |
1725 segQIndexes[l]=segQIndexes[l-1]; | |
1726 segSIndexes[l]=segSIndexes[l-1]; | |
1727 segIDs[l]=segIDs[l-1]; | |
1728 } | |
1729 segDistances[n]=thisDist; | |
1730 segQIndexes[n]=qIndexes[0]; | |
1731 segSIndexes[n]=sIndexes[0]; | |
1732 successfulSegs++; | |
1733 segIDs[n]=seg; | |
1734 break; | |
1735 } | |
1736 } | |
1737 else | |
1738 break; | |
1739 } | |
1740 } // Duration match | |
1741 | |
1742 // per-seg reset array values | |
1743 for(unsigned k=0; k<pointNN; k++){ | |
1744 distances[k]=0.0; | |
1745 qIndexes[k]=~0; | |
1746 sIndexes[k]=~0; | |
1747 } | |
1748 | |
1749 // Clean up current seg | |
1750 if(D!=NULL){ | |
1751 for(j=0; j<numVectors; j++) | |
1752 delete[] D[j]; | |
1753 } | |
1754 | |
1755 if(DD!=NULL){ | |
1756 for(j=0; j<numVectors; j++) | |
1757 delete[] DD[j]; | |
1758 } | |
1759 } | |
1760 } | |
1761 | |
1762 gettimeofday(&tv2,NULL); | |
1763 if(verbosity>1) | |
1764 cerr << endl << "processed segs :" << processedSegs << " matched segments: " << successfulSegs << " elapsed time:" | |
1765 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
1766 | |
1767 if(adbQueryResult==0){ | |
1768 if(verbosity>1) | |
1769 cerr<<endl; | |
1770 // Output answer | |
1771 // Loop over nearest neighbours | |
1772 for(k=0; k < min(segNN,successfulSegs); k++) | |
1773 cout << fileTable+segIDs[k]*O2_FILETABLESIZE << " " << segDistances[k] << " " << segQIndexes[k] << " " << segSIndexes[k] << endl; | |
1774 } | |
1775 else{ // Process Web Services Query | |
1776 int listLen = min(segNN, processedSegs); | |
1777 adbQueryResult->__sizeRlist=listLen; | |
1778 adbQueryResult->__sizeDist=listLen; | |
1779 adbQueryResult->__sizeQpos=listLen; | |
1780 adbQueryResult->__sizeSpos=listLen; | |
1781 adbQueryResult->Rlist= new char*[listLen]; | |
1782 adbQueryResult->Dist = new double[listLen]; | |
1783 adbQueryResult->Qpos = new int[listLen]; | |
1784 adbQueryResult->Spos = new int[listLen]; | |
1785 for(k=0; k<adbQueryResult->__sizeRlist; k++){ | |
1786 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | |
1787 adbQueryResult->Dist[k]=segDistances[k]; | |
1788 adbQueryResult->Qpos[k]=segQIndexes[k]; | |
1789 adbQueryResult->Spos[k]=segSIndexes[k]; | |
1790 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); | |
1791 } | |
1792 } | |
1793 | |
1794 | |
1795 // Clean up | |
1796 if(segOffsetTable) | |
1797 delete segOffsetTable; | |
1798 if(queryCopy) | |
1799 delete queryCopy; | |
1800 //if(qNorm) | |
1801 //delete qNorm; | |
1802 if(D) | |
1803 delete[] D; | |
1804 if(DD) | |
1805 delete[] DD; | |
1806 if(timesdata) | |
1807 delete timesdata; | |
1808 if(meanDBdur) | |
1809 delete meanDBdur; | |
1810 | |
1811 | |
1812 } | |
1813 | |
1814 void audioDB::normalize(double* X, int dim, int n){ | |
1815 unsigned c = n*dim; | |
1816 double minval,maxval,v,*p; | |
1817 | |
1818 p=X; | |
1819 while(c--){ | |
1820 v=*p++; | |
1821 if(v<minval) | |
1822 minval=v; | |
1823 else if(v>maxval) | |
1824 maxval=v; | |
1825 } | |
1826 | |
1827 normalize(X, dim, n, minval, maxval); | |
1828 | |
1829 } | |
1830 | |
1831 void audioDB::normalize(double* X, int dim, int n, double minval, double maxval){ | |
1832 unsigned c = n*dim; | |
1833 double *p; | |
1834 | |
1835 | |
1836 if(maxval==minval) | |
1837 return; | |
1838 | |
1839 maxval=1.0/(maxval-minval); | |
1840 c=n*dim; | |
1841 p=X; | |
1842 | |
1843 while(c--){ | |
1844 *p=(*p-minval)*maxval; | |
1845 p++; | |
1846 } | |
1847 } | |
1848 | |
1849 // Unit norm block of features | |
1850 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ | |
1851 unsigned d; | |
1852 double L2, oneOverL2, *p; | |
1853 if(verbosity>2) | |
1854 cerr << "norming " << n << " vectors...";cerr.flush(); | |
1855 while(n--){ | |
1856 p=X; | |
1857 L2=0.0; | |
1858 d=dim; | |
1859 while(d--){ | |
1860 L2+=*p**p; | |
1861 p++; | |
1862 } | |
1863 L2=sqrt(L2); | |
1864 if(qNorm) | |
1865 *qNorm++=L2; | |
1866 oneOverL2 = 1.0/L2; | |
1867 d=dim; | |
1868 while(d--){ | |
1869 *X*=oneOverL2; | |
1870 X++; | |
1871 } | |
1872 } | |
1873 if(verbosity>2) | |
1874 cerr << "done..." << endl; | |
1875 } | |
1876 | |
1877 // Unit norm block of features | |
1878 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ | |
1879 unsigned d; | |
1880 double L2, oneOverL2, *p; | |
1881 unsigned nn = n; | |
1882 | |
1883 assert(l2normTable); | |
1884 | |
1885 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) | |
1886 error("Database is already L2 normed", "automatic norm on insert is enabled"); | |
1887 | |
1888 if(verbosity>2) | |
1889 cerr << "norming " << n << " vectors...";cerr.flush(); | |
1890 | |
1891 double* l2buf = new double[n]; | |
1892 double* l2ptr = l2buf; | |
1893 assert(l2buf); | |
1894 assert(X); | |
1895 | |
1896 while(nn--){ | |
1897 p=X; | |
1898 *l2ptr=0.0; | |
1899 d=dim; | |
1900 while(d--){ | |
1901 *l2ptr+=*p**p; | |
1902 p++; | |
1903 } | |
1904 *l2ptr=sqrt(*l2ptr); | |
1905 oneOverL2 = 1.0/(*l2ptr++); | |
1906 d=dim; | |
1907 while(d--){ | |
1908 *X*=oneOverL2; | |
1909 X++; | |
1910 } | |
1911 } | |
1912 unsigned offset; | |
1913 if(append) | |
1914 offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors | |
1915 else | |
1916 offset=0; | |
1917 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); | |
1918 if(l2buf) | |
1919 delete l2buf; | |
1920 if(verbosity>2) | |
1921 cerr << "done..." << endl; | |
1922 } | |
1923 | |
1924 | |
1925 // Start an audioDB server on the host | |
1926 void audioDB::startServer(){ | |
1927 struct soap soap; | |
1928 int m, s; // master and slave sockets | |
1929 soap_init(&soap); | |
1930 m = soap_bind(&soap, NULL, port, 100); | |
1931 if (m < 0) | |
1932 soap_print_fault(&soap, stderr); | |
1933 else | |
1934 { | |
1935 fprintf(stderr, "Socket connection successful: master socket = %d\n", m); | |
1936 for (int i = 1; ; i++) | |
1937 { | |
1938 s = soap_accept(&soap); | |
1939 if (s < 0) | |
1940 { | |
1941 soap_print_fault(&soap, stderr); | |
1942 break; | |
1943 } | |
1944 fprintf(stderr, "%d: accepted connection from IP=%d.%d.%d.%d socket=%d\n", i, | |
1945 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s); | |
1946 if (soap_serve(&soap) != SOAP_OK) // process RPC request | |
1947 soap_print_fault(&soap, stderr); // print error | |
1948 fprintf(stderr, "request served\n"); | |
1949 soap_destroy(&soap); // clean up class instances | |
1950 soap_end(&soap); // clean up everything and close socket | |
1951 } | |
1952 } | |
1953 soap_done(&soap); // close master socket and detach environment | |
1954 } | |
1955 | |
1956 | |
1957 // web services | |
1958 | |
1959 // SERVER SIDE | |
1960 int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){ | |
1961 char* const argv[]={"audioDB",COM_STATUS,dbName}; | |
1962 const unsigned argc = 3; | |
1963 audioDB(argc,argv); | |
1964 adbCreateResult=100; | |
1965 return SOAP_OK; | |
1966 } | |
1967 | |
1968 // Literal translation of command line to web service | |
1969 | |
1970 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int seqLen, adb__queryResult &adbQueryResult){ | |
1971 char queryType[256]; | |
1972 for(int k=0; k<256; k++) | |
1973 queryType[k]='\0'; | |
1974 if(qType == O2_FLAG_POINT_QUERY) | |
1975 strncpy(queryType, "point", strlen("point")); | |
1976 else if (qType == O2_FLAG_SEQUENCE_QUERY) | |
1977 strncpy(queryType, "sequence", strlen("sequence")); | |
1978 else if(qType == O2_FLAG_SEG_QUERY) | |
1979 strncpy(queryType,"segment", strlen("segment")); | |
1980 else | |
1981 strncpy(queryType, "", strlen("")); | |
1982 | |
1983 if(pointNN==0) | |
1984 pointNN=10; | |
1985 if(segNN==0) | |
1986 segNN=10; | |
1987 if(seqLen==0) | |
1988 seqLen=16; | |
1989 | |
1990 char qPosStr[256]; | |
1991 sprintf(qPosStr, "%d", qPos); | |
1992 char pointNNStr[256]; | |
1993 sprintf(pointNNStr,"%d",pointNN); | |
1994 char segNNStr[256]; | |
1995 sprintf(segNNStr,"%d",segNN); | |
1996 char seqLenStr[256]; | |
1997 sprintf(seqLenStr,"%d",seqLen); | |
1998 | |
1999 const char* argv[] ={ | |
2000 "./audioDB", | |
2001 COM_QUERY, | |
2002 queryType, // Need to pass a parameter | |
2003 COM_DATABASE, | |
2004 dbName, | |
2005 COM_FEATURES, | |
2006 qKey, | |
2007 COM_KEYLIST, | |
2008 keyList==0?"":keyList, | |
2009 COM_TIMES, | |
2010 timesFileName==0?"":timesFileName, | |
2011 COM_QPOINT, | |
2012 qPosStr, | |
2013 COM_POINTNN, | |
2014 pointNNStr, | |
2015 COM_SEGNN, | |
2016 segNNStr, // Need to pass a parameter | |
2017 COM_SEQLEN, | |
2018 seqLenStr | |
2019 }; | |
2020 | |
2021 const unsigned argc = 19; | |
2022 audioDB(argc, (char* const*)argv, &adbQueryResult); | |
2023 return SOAP_OK; | |
2024 } | |
2025 | |
2026 int main(const unsigned argc, char* const argv[]){ | |
2027 audioDB(argc, argv); | |
2028 } | |
2029 | |
2030 |