annotate audioDB.cpp @ 12:a206d3e91f8b

fixed keyList logic (in sequence match only) to avoid halting just because the database's last key was visited before end of keylist sequence.
author mas01mc
date Thu, 26 Jul 2007 14:08:11 +0000
parents 3d134836ea14
children c633f3819a49
rev   line source
mas01cr@0 1 /* audioDB.cpp
mas01cr@0 2
mas01cr@0 3 audioDB version 1.0
mas01cr@0 4
mas01cr@0 5 A feature vector database management system for content-based retrieval.
mas01cr@0 6
mas01cr@0 7 Usage: audioDB [OPTIONS]...
mas01cr@0 8
mas01cr@0 9 --full-help Print help, including hidden options, and exit
mas01cr@0 10 -V, --version Print version and exit
mas01cr@0 11 -H, --help print help on audioDB usage and exit.
mas01cr@0 12 -v, --verbosity=detail level of detail of operational information.
mas01cr@0 13 (default=`1')
mas01cr@0 14
mas01cr@0 15 Database Setup:
mas01cr@0 16 All database operations require a database argument.
mas01cr@0 17
mas01cr@0 18 Database commands are UPPER CASE. Command options are lower case.
mas01cr@0 19
mas01cr@0 20 -d, --database=filename database file required by Database commands.
mas01cr@0 21 -N, --NEW make a new (initially empty) database.
mas01cr@0 22 -S, --STATUS output database information to stdout.
mas01cr@0 23 -D, --DUMP output all entries: index key size.
mas01cr@0 24 -L, --L2NORM unit norm vectors and norm all future inserts.
mas01cr@0 25
mas01cr@0 26 Database Insertion:
mas01cr@0 27 The following commands insert feature files, with optional keys and
mas01cr@0 28 timestamps.
mas01cr@0 29
mas01cr@0 30 -I, --INSERT add feature vectors to an existing database.
mas01cr@0 31 -U, --UPDATE replace inserted vectors associated with key
mas01cr@0 32 with new input vectors.
mas01cr@0 33 -f, --features=filename binary series of vectors file {int sz:ieee
mas01cr@0 34 double[][sz]:eof}.
mas01cr@0 35 -t, --times=filename list of time points (ascii) for feature vectors.
mas01cr@0 36 -k, --key=identifier unique identifier associated with features.
mas01cr@0 37
mas01cr@0 38 -B, --BATCHINSERT add feature vectors named in a --featureList
mas01cr@0 39 file (with optional keys in a --keyList file)
mas01cr@0 40 to the named database.
mas01cr@0 41 -F, --featureList=filename text file containing list of binary feature
mas01cr@0 42 vector files to process
mas01cr@0 43 -T, --timesList=filename text file containing list of ascii --times for
mas01cr@0 44 each --features file in --featureList.
mas01cr@0 45 -K, --keyList=filename text file containing list of unique identifiers
mas01cr@0 46 associated with --features.
mas01cr@0 47
mas01cr@0 48 Database Search:
mas01cr@0 49 Thse commands control the retrieval behaviour.
mas01cr@0 50
mas01cr@0 51 -Q, --QUERY=searchtype content-based search on --database using
mas01cr@0 52 --features as a query. Optionally restrict the
mas01cr@0 53 search to those segments identified in a
mas01cr@0 54 --keyList. (possible values="point",
mas01cr@0 55 "segment", "sequence")
mas01cr@0 56 -p, --qpoint=position ordinal position of query start point in
mas01cr@0 57 --features file. (default=`0')
mas01cr@0 58 -e, --exhaustive exhaustive search: iterate through all query
mas01cr@0 59 vectors in search. Overrides --qpoint.
mas01cr@0 60 (default=off)
mas01cr@0 61 -n, --pointnn=numpoints number of point nearest neighbours to use in
mas01cr@0 62 retrieval. (default=`10')
mas01cr@0 63 -R, --radius=DOUBLE radius search, returns all
mas01cr@0 64 points/segments/sequences inside given radius.
mas01cr@0 65 (default=`1.0')
mas01cr@0 66 -x, --expandfactor=DOUBLE time compress/expand factor of result length to
mas01cr@0 67 query length [1.0 .. 100.0]. (default=`1.1')
mas01cr@0 68 -o, --rotate rotate query vectors for rotationally invariant
mas01cr@0 69 search. (default=off)
mas01cr@0 70 -r, --resultlength=length maximum length of the result list.
mas01cr@0 71 (default=`10')
mas01cr@0 72 -l, --sequencelength=length length of sequences for sequence search.
mas01cr@0 73 (default=`16')
mas01cr@0 74 -h, --sequencehop=hop hop size of sequence window for sequence search.
mas01cr@0 75 (default=`1')
mas01cr@0 76
mas01cr@0 77 Web Services:
mas01cr@0 78 These commands enable the database process to establish a connection via the
mas01cr@0 79 internet and operate as separate client and server processes.
mas01cr@0 80
mas01cr@0 81 -s, --SERVER=port run as standalone web service on named port.
mas01cr@0 82 (default=`80011')
mas01cr@0 83 -c, --client=hostname:port run as a client using named host service.
mas01cr@0 84
mas01cr@0 85 Copyright (C) 2007 Michael Casey, Goldsmiths, University of London
mas01cr@0 86
mas01cr@0 87 outputs:
mas01cr@0 88
mas01cr@0 89 key1 distance1 qpos1 spos1
mas01cr@0 90 key2 distance2 qpos2 spos2
mas01cr@0 91 ...
mas01cr@0 92 keyN distanceN qposN sposN
mas01cr@0 93
mas01cr@0 94 */
mas01cr@0 95
mas01cr@0 96 #include "audioDB.h"
mas01cr@0 97
mas01cr@0 98 #define O2_DEBUG
mas01cr@0 99
mas01cr@0 100 void audioDB::error(const char* a, const char* b){
mas01cr@0 101 cerr << a << ":" << b << endl;
mas01cr@0 102 exit(1);
mas01cr@0 103 }
mas01cr@0 104
mas01cr@0 105 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult):
mas01cr@0 106 dim(0),
mas01cr@0 107 dbName(0),
mas01cr@0 108 inFile(0),
mas01cr@0 109 key(0),
mas01cr@0 110 segFile(0),
mas01cr@0 111 segFileName(0),
mas01cr@0 112 timesFile(0),
mas01cr@0 113 timesFileName(0),
mas01cr@0 114 usingTimes(0),
mas01cr@0 115 command(0),
mas01cr@0 116 dbfid(0),
mas01cr@0 117 db(0),
mas01cr@0 118 dbH(0),
mas01cr@0 119 infid(0),
mas01cr@0 120 indata(0),
mas01cr@0 121 queryType(O2_FLAG_POINT_QUERY),
mas01cr@0 122 verbosity(1),
mas01cr@0 123 pointNN(O2_DEFAULT_POINTNN),
mas01cr@0 124 segNN(O2_DEFAULT_SEGNN),
mas01cr@0 125 segTable(0),
mas01cr@0 126 fileTable(0),
mas01cr@0 127 dataBuf(0),
mas01cr@0 128 l2normTable(0),
mas01cr@0 129 timesTable(0),
mas01cr@0 130 qNorm(0),
mas01cr@0 131 sequenceLength(16),
mas01cr@0 132 sequenceHop(1),
mas01cr@0 133 queryPoint(0),
mas01cr@0 134 usingQueryPoint(0),
mas01cr@0 135 isClient(0),
mas01cr@0 136 isServer(0),
mas01cr@0 137 port(0),
mas01mc@11 138 timesTol(0.1){
mas01cr@0 139
mas01cr@0 140 if(processArgs(argc, argv)<0){
mas01cr@0 141 printf("No command found.\n");
mas01cr@0 142 cmdline_parser_print_version ();
mas01cr@0 143 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 144 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 145 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 146 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 147 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 148 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 149 exit(1);
mas01cr@0 150 }
mas01cr@0 151
mas01cr@0 152 if(O2_ACTION(COM_SERVER))
mas01cr@0 153 startServer();
mas01cr@0 154
mas01cr@0 155 else if(O2_ACTION(COM_CREATE))
mas01cr@0 156 create(dbName);
mas01cr@0 157
mas01cr@0 158 else if(O2_ACTION(COM_INSERT))
mas01cr@0 159 insert(dbName, inFile);
mas01cr@0 160
mas01cr@0 161 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 162 batchinsert(dbName, inFile);
mas01cr@0 163
mas01cr@0 164 else if(O2_ACTION(COM_QUERY))
mas01cr@0 165 if(isClient)
mas01cr@0 166 ws_query(dbName, inFile, (char*)hostport);
mas01cr@0 167 else
mas01cr@0 168 query(dbName, inFile, adbQueryResult);
mas01cr@0 169
mas01cr@0 170 else if(O2_ACTION(COM_STATUS))
mas01cr@0 171 if(isClient)
mas01cr@0 172 ws_status(dbName,(char*)hostport);
mas01cr@0 173 else
mas01cr@0 174 status(dbName);
mas01cr@0 175
mas01cr@0 176 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 177 l2norm(dbName);
mas01cr@0 178
mas01cr@0 179 else if(O2_ACTION(COM_DUMP))
mas01cr@0 180 dump(dbName);
mas01cr@0 181
mas01cr@0 182 else
mas01cr@0 183 error("Unrecognized command",command);
mas01cr@0 184 }
mas01cr@0 185
mas01cr@0 186 audioDB::~audioDB(){
mas01cr@0 187 // Clean up
mas01cr@0 188 if(indata)
mas01cr@0 189 munmap(indata,statbuf.st_size);
mas01cr@0 190 if(db)
mas01cr@0 191 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 192 if(dbfid>0)
mas01cr@0 193 close(dbfid);
mas01cr@0 194 if(infid>0)
mas01cr@0 195 close(infid);
mas01cr@0 196 if(dbH)
mas01cr@0 197 delete dbH;
mas01cr@0 198 }
mas01cr@0 199
mas01cr@0 200 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 201
mas01cr@0 202 if(argc<2){
mas01cr@0 203 cmdline_parser_print_version ();
mas01cr@0 204 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 205 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 206 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 207 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 208 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 209 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 210 exit(0);
mas01cr@0 211 }
mas01cr@0 212
mas01cr@0 213 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@0 214 exit(1) ;
mas01cr@0 215
mas01cr@0 216 if(args_info.help_given){
mas01cr@0 217 cmdline_parser_print_help();
mas01cr@0 218 exit(0);
mas01cr@0 219 }
mas01cr@0 220
mas01cr@0 221 if(args_info.verbosity_given){
mas01cr@0 222 verbosity=args_info.verbosity_arg;
mas01cr@0 223 if(verbosity<0 || verbosity>10){
mas01cr@0 224 cerr << "Warning: verbosity out of range, setting to 1" << endl;
mas01cr@0 225 verbosity=1;
mas01cr@0 226 }
mas01cr@0 227 }
mas01cr@0 228
mas01cr@0 229 if(args_info.SERVER_given){
mas01cr@0 230 command=COM_SERVER;
mas01cr@0 231 port=args_info.SERVER_arg;
mas01cr@0 232 if(port<100 || port > 100000)
mas01cr@0 233 error("port out of range");
mas01cr@0 234 isServer=1;
mas01cr@0 235 return 0;
mas01cr@0 236 }
mas01cr@0 237
mas01cr@0 238 // No return on client command, find database command
mas01cr@0 239 if(args_info.client_given){
mas01cr@0 240 command=COM_CLIENT;
mas01cr@0 241 hostport=args_info.client_arg;
mas01cr@0 242 isClient=1;
mas01cr@0 243 }
mas01cr@0 244
mas01cr@0 245 if(args_info.NEW_given){
mas01cr@0 246 command=COM_CREATE;
mas01cr@0 247 dbName=args_info.database_arg;
mas01cr@0 248 return 0;
mas01cr@0 249 }
mas01cr@0 250
mas01cr@0 251 if(args_info.STATUS_given){
mas01cr@0 252 command=COM_STATUS;
mas01cr@0 253 dbName=args_info.database_arg;
mas01cr@0 254 return 0;
mas01cr@0 255 }
mas01cr@0 256
mas01cr@0 257 if(args_info.DUMP_given){
mas01cr@0 258 command=COM_DUMP;
mas01cr@0 259 dbName=args_info.database_arg;
mas01cr@0 260 return 0;
mas01cr@0 261 }
mas01cr@0 262
mas01cr@0 263 if(args_info.L2NORM_given){
mas01cr@0 264 command=COM_L2NORM;
mas01cr@0 265 dbName=args_info.database_arg;
mas01cr@0 266 return 0;
mas01cr@0 267 }
mas01cr@0 268
mas01cr@0 269 if(args_info.INSERT_given){
mas01cr@0 270 command=COM_INSERT;
mas01cr@0 271 dbName=args_info.database_arg;
mas01cr@0 272 inFile=args_info.features_arg;
mas01cr@0 273 if(args_info.key_given)
mas01cr@0 274 key=args_info.key_arg;
mas01cr@0 275 if(args_info.times_given){
mas01cr@0 276 timesFileName=args_info.times_arg;
mas01cr@0 277 if(strlen(timesFileName)>0){
mas01cr@0 278 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@0 279 error("Could not open times file for reading", timesFileName);
mas01cr@0 280 usingTimes=1;
mas01cr@0 281 }
mas01cr@0 282 }
mas01cr@0 283 return 0;
mas01cr@0 284 }
mas01mc@10 285
mas01cr@0 286 if(args_info.BATCHINSERT_given){
mas01cr@0 287 command=COM_BATCHINSERT;
mas01cr@0 288 dbName=args_info.database_arg;
mas01cr@0 289 inFile=args_info.featureList_arg;
mas01cr@0 290 if(args_info.keyList_given)
mas01cr@0 291 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 292
mas01cr@0 293 /* TO DO: REPLACE WITH
mas01cr@0 294 if(args_info.keyList_given){
mas01cr@0 295 segFileName=args_info.keyList_arg;
mas01cr@0 296 if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in)))
mas01cr@0 297 error("Could not open keyList file for reading",segFileName);
mas01cr@0 298 }
mas01cr@0 299 AND UPDATE BATCHINSERT()
mas01cr@0 300 */
mas01cr@0 301
mas01cr@0 302 if(args_info.timesList_given){
mas01cr@0 303 timesFileName=args_info.timesList_arg;
mas01cr@0 304 if(strlen(timesFileName)>0){
mas01cr@0 305 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@0 306 error("Could not open timesList file for reading", timesFileName);
mas01cr@0 307 usingTimes=1;
mas01cr@0 308 }
mas01cr@0 309 }
mas01cr@0 310 return 0;
mas01cr@0 311 }
mas01cr@0 312
mas01cr@0 313 // Query command and arguments
mas01cr@0 314 if(args_info.QUERY_given){
mas01cr@0 315 command=COM_QUERY;
mas01cr@0 316 dbName=args_info.database_arg;
mas01cr@0 317 inFile=args_info.features_arg;
mas01cr@0 318
mas01cr@0 319 if(args_info.keyList_given){
mas01cr@0 320 segFileName=args_info.keyList_arg;
mas01cr@0 321 if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in)))
mas01cr@0 322 error("Could not open keyList file for reading",segFileName);
mas01cr@0 323 }
mas01cr@0 324
mas01cr@0 325 if(args_info.times_given){
mas01cr@0 326 timesFileName=args_info.times_arg;
mas01cr@0 327 if(strlen(timesFileName)>0){
mas01cr@0 328 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@0 329 error("Could not open times file for reading", timesFileName);
mas01cr@0 330 usingTimes=1;
mas01cr@0 331 }
mas01cr@0 332 }
mas01cr@0 333
mas01cr@0 334 // query type
mas01cr@0 335 if(strncmp(args_info.QUERY_arg, "segment", MAXSTR)==0)
mas01cr@0 336 queryType=O2_FLAG_SEG_QUERY;
mas01cr@0 337 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@0 338 queryType=O2_FLAG_POINT_QUERY;
mas01cr@0 339 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@0 340 queryType=O2_FLAG_SEQUENCE_QUERY;
mas01cr@0 341 else
mas01cr@0 342 error("unsupported query type",args_info.QUERY_arg);
mas01cr@0 343
mas01cr@0 344 if(!args_info.exhaustive_flag){
mas01cr@0 345 queryPoint = args_info.qpoint_arg;
mas01cr@0 346 usingQueryPoint=1;
mas01cr@0 347 if(queryPoint<0 || queryPoint >10000)
mas01cr@0 348 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@0 349 }
mas01cr@0 350
mas01cr@0 351
mas01cr@0 352 pointNN=args_info.pointnn_arg;
mas01cr@0 353 if(pointNN<1 || pointNN >1000)
mas01cr@0 354 error("pointNN out of range: 1 <= pointNN <= 1000");
mas01cr@0 355
mas01cr@0 356
mas01cr@0 357
mas01cr@0 358 segNN=args_info.resultlength_arg;
mas01cr@0 359 if(segNN<1 || segNN >1000)
mas01cr@0 360 error("resultlength out of range: 1 <= resultlength <= 1000");
mas01cr@0 361
mas01cr@0 362
mas01cr@0 363 sequenceLength=args_info.sequencelength_arg;
mas01cr@0 364 if(sequenceLength<1 || sequenceLength >1000)
mas01cr@0 365 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@0 366
mas01cr@0 367 sequenceHop=args_info.sequencehop_arg;
mas01cr@0 368 if(sequenceHop<1 || sequenceHop >1000)
mas01cr@0 369 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01cr@0 370
mas01cr@0 371 return 0;
mas01cr@0 372 }
mas01cr@0 373 return -1; // no command found
mas01cr@0 374 }
mas01cr@0 375
mas01cr@0 376 /* Make a new database
mas01cr@0 377
mas01cr@0 378 The database consists of:
mas01cr@0 379
mas01cr@0 380 header
mas01cr@0 381 ---------------------------------------------------------------------------------
mas01cr@0 382 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes |
mas01cr@0 383 ---------------------------------------------------------------------------------
mas01cr@0 384
mas01cr@0 385
mas01cr@0 386 keyTable : list of keys of segments
mas01cr@0 387 --------------------------------------------------------------------------
mas01cr@0 388 | key 256 bytes |
mas01cr@0 389 --------------------------------------------------------------------------
mas01cr@0 390 O2_MAXFILES*02_FILENAMELENGTH
mas01cr@0 391
mas01cr@0 392 segTable : Maps implicit feature index to a feature vector matrix
mas01cr@0 393 --------------------------------------------------------------------------
mas01cr@0 394 | numVectors (4 bytes) |
mas01cr@0 395 --------------------------------------------------------------------------
mas01cr@0 396 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT)
mas01cr@0 397
mas01cr@0 398 featureTable
mas01cr@0 399 --------------------------------------------------------------------------
mas01cr@0 400 | v1 v2 v3 ... vd (double) |
mas01cr@0 401 --------------------------------------------------------------------------
mas01cr@0 402 O2_MAXFILES * 02_MEANNUMFEATURES * DIM * sizeof(DOUBLE)
mas01cr@0 403
mas01cr@0 404 timesTable
mas01cr@0 405 --------------------------------------------------------------------------
mas01cr@0 406 | timestamp (double) |
mas01cr@0 407 --------------------------------------------------------------------------
mas01cr@0 408 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)
mas01cr@0 409
mas01cr@0 410 l2normTable
mas01cr@0 411 --------------------------------------------------------------------------
mas01cr@0 412 | nm (double) |
mas01cr@0 413 --------------------------------------------------------------------------
mas01cr@0 414 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)
mas01cr@0 415
mas01cr@0 416 */
mas01cr@0 417
mas01cr@0 418 void audioDB::create(const char* dbName){
mas01cr@8 419 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
mas01cr@8 420 error("Can't open database file", dbName);
mas01cr@0 421
mas01cr@0 422 // go to the location corresponding to the last byte
mas01cr@0 423 if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1)
mas01cr@0 424 error("lseek error in db file");
mas01cr@0 425
mas01cr@0 426 // write a dummy byte at the last location
mas01cr@0 427 if (write (dbfid, "", 1) != 1)
mas01cr@0 428 error("write error");
mas01cr@0 429
mas01cr@0 430 // mmap the output file
mas01cr@0 431 if(verbosity)
mas01cr@0 432 cerr << "header size:" << O2_HEADERSIZE << endl;
mas01cr@0 433 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01cr@0 434 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@0 435 error("mmap error for creating database");
mas01cr@0 436
mas01cr@0 437 dbH = new dbTableHeaderT();
mas01cr@0 438 assert(dbH);
mas01cr@0 439
mas01cr@0 440 // Initialize header
mas01cr@0 441 dbH->magic=O2_MAGIC;
mas01cr@0 442 dbH->numFiles=0;
mas01cr@0 443 dbH->length=0;
mas01cr@0 444 dbH->dim=0;
mas01cr@0 445 dbH->flags=0; //O2_FLAG_L2NORM;
mas01cr@0 446
mas01cr@0 447 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 448 if(verbosity)
mas01cr@0 449 cerr << COM_CREATE << " " << dbName << endl;
mas01cr@0 450
mas01cr@0 451 }
mas01cr@0 452
mas01cr@0 453
mas01cr@0 454 void audioDB::drop(){
mas01cr@0 455
mas01cr@0 456
mas01cr@0 457 }
mas01cr@0 458
mas01cr@0 459 // initTables - memory map files passed as arguments
mas01cr@0 460 // Precondition: database has already been created
mas01cr@0 461 void audioDB::initTables(const char* dbName, const char* inFile=0){
mas01cr@0 462 if ((dbfid = open (dbName, O_RDWR)) < 0)
mas01cr@0 463 error("Can't open database file:", dbName);
mas01cr@0 464
mas01cr@0 465 // open the input file
mas01cr@0 466 if (inFile && (infid = open (inFile, O_RDONLY)) < 0)
mas01cr@9 467 error("can't open input file for reading", inFile);
mas01cr@0 468
mas01cr@0 469 // find size of input file
mas01cr@0 470 if (inFile && fstat (infid,&statbuf) < 0)
mas01cr@0 471 error("fstat error finding size of input");
mas01cr@0 472
mas01cr@0 473 // Get the database header info
mas01cr@0 474 dbH = new dbTableHeaderT();
mas01cr@0 475 assert(dbH);
mas01cr@0 476
mas01cr@0 477 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
mas01cr@0 478 error("error reading db header");
mas01cr@0 479
mas01cr@0 480 fileTableOffset = O2_HEADERSIZE;
mas01cr@0 481 segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
mas01cr@0 482 dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES;
mas01cr@0 483 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 484 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 485
mas01cr@0 486 if(dbH->magic!=O2_MAGIC){
mas01cr@0 487 cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl;
mas01cr@0 488 error("database file has incorrect header",dbName);
mas01cr@0 489 }
mas01cr@0 490
mas01cr@0 491 if(inFile)
mas01cr@0 492 if(dbH->dim==0 && dbH->length==0) // empty database
mas01cr@0 493 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
mas01cr@0 494 else {
mas01cr@0 495 unsigned test;
mas01cr@0 496 read(infid,&test,sizeof(unsigned));
mas01cr@0 497 if(dbH->dim!=test){
mas01cr@0 498 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
mas01cr@0 499 error("feature dimensions do not match database table dimensions");
mas01cr@0 500 }
mas01cr@0 501 }
mas01cr@0 502
mas01cr@0 503 // mmap the input file
mas01cr@0 504 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
mas01cr@0 505 == (caddr_t) -1)
mas01cr@0 506 error("mmap error for input");
mas01cr@0 507
mas01cr@0 508 // mmap the database file
mas01cr@0 509 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01cr@0 510 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@0 511 error("mmap error for creating database");
mas01cr@0 512
mas01cr@0 513 // Make some handy tables with correct types
mas01cr@0 514 fileTable= (char*)(db+fileTableOffset);
mas01cr@0 515 segTable = (unsigned*)(db+segTableOffset);
mas01cr@0 516 dataBuf = (double*)(db+dataoffset);
mas01cr@0 517 l2normTable = (double*)(db+l2normTableOffset);
mas01cr@0 518 timesTable = (double*)(db+timesTableOffset);
mas01cr@0 519
mas01cr@0 520 }
mas01cr@0 521
mas01cr@0 522 void audioDB::insert(const char* dbName, const char* inFile){
mas01cr@0 523
mas01cr@0 524 initTables(dbName, inFile);
mas01cr@0 525
mas01cr@0 526 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 527 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 528
mas01cr@0 529 // Check that there is room for at least 1 more file
mas01cr@0 530 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int)))
mas01cr@0 531 error("No more room in database","insert failed: reason database is full.");
mas01cr@0 532
mas01cr@0 533 if(!key)
mas01cr@0 534 key=inFile;
mas01cr@0 535 // Linear scan of filenames check for pre-existing feature
mas01cr@0 536 unsigned alreadyInserted=0;
mas01cr@0 537 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 538 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){
mas01cr@0 539 alreadyInserted=1;
mas01cr@0 540 break;
mas01cr@0 541 }
mas01cr@0 542
mas01cr@0 543 if(alreadyInserted){
mas01cr@0 544 if(verbosity)
mas01cr@0 545 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
mas01cr@0 546 return;
mas01cr@0 547 }
mas01cr@0 548
mas01cr@0 549 // Make a segment index table of features to file indexes
mas01cr@0 550 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 551 if(!numVectors){
mas01cr@0 552 if(verbosity)
mas01cr@0 553 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
mas01cr@0 554 // CLEAN UP
mas01cr@0 555 munmap(indata,statbuf.st_size);
mas01cr@0 556 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 557 close(infid);
mas01cr@0 558 return;
mas01cr@0 559 }
mas01cr@0 560
mas01cr@0 561 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
mas01cr@0 562
mas01cr@0 563 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 564
mas01cr@0 565 // Check times status and insert times from file
mas01cr@0 566 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 567 double* timesdata=timesTable+timesoffset;
mas01cr@0 568 assert(timesdata+numVectors<l2normTable);
mas01cr@0 569 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 570
mas01cr@0 571 // Increment file count
mas01cr@0 572 dbH->numFiles++;
mas01cr@0 573
mas01cr@0 574 // Update Header information
mas01cr@0 575 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 576
mas01cr@0 577 // Copy the header back to the database
mas01cr@0 578 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@0 579
mas01cr@0 580 // Update segment to file index map
mas01cr@0 581 //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
mas01cr@0 582 memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 583
mas01cr@0 584 // Update the feature database
mas01cr@0 585 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
mas01cr@0 586
mas01cr@0 587 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 588 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 589 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 590
mas01cr@0 591 // Report status
mas01cr@0 592 status(dbName);
mas01cr@0 593 if(verbosity)
mas01cr@0 594 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
mas01cr@0 595 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;
mas01cr@0 596
mas01cr@0 597 // CLEAN UP
mas01cr@0 598 munmap(indata,statbuf.st_size);
mas01cr@0 599 close(infid);
mas01cr@0 600 }
mas01cr@0 601
mas01cr@0 602 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){
mas01cr@0 603 unsigned numtimes=0;
mas01cr@0 604 if(usingTimes){
mas01cr@0 605 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles)
mas01cr@0 606 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@0 607 else if(!(dbH->flags&O2_FLAG_TIMES)){
mas01cr@0 608 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl;
mas01cr@0 609 usingTimes=0;
mas01cr@0 610 }
mas01cr@0 611
mas01cr@0 612 if(!timesFile->is_open()){
mas01cr@0 613 if(dbH->flags & O2_FLAG_TIMES){
mas01cr@0 614 munmap(indata,statbuf.st_size);
mas01cr@0 615 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 616 error("problem opening times file on timestamped database",timesFileName);
mas01cr@0 617 }
mas01cr@0 618 else{
mas01cr@0 619 cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl;
mas01cr@0 620 usingTimes=0;
mas01cr@0 621 }
mas01cr@0 622 }
mas01cr@0 623
mas01cr@0 624 // Process time file
mas01cr@0 625 if(usingTimes){
mas01cr@0 626 do{
mas01cr@0 627 *timesFile>>*timesdata++;
mas01cr@0 628 if(timesFile->eof())
mas01cr@0 629 break;
mas01cr@0 630 numtimes++;
mas01cr@0 631 }while(!timesFile->eof() && numtimes<numVectors);
mas01cr@0 632 if(!timesFile->eof()){
mas01cr@0 633 double dummy;
mas01cr@0 634 do{
mas01cr@0 635 *timesFile>>dummy;
mas01cr@0 636 if(timesFile->eof())
mas01cr@0 637 break;
mas01cr@0 638 numtimes++;
mas01cr@0 639 }while(!timesFile->eof());
mas01cr@0 640 }
mas01cr@0 641 if(numtimes<numVectors || numtimes>numVectors+2){
mas01cr@0 642 munmap(indata,statbuf.st_size);
mas01cr@0 643 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 644 close(infid);
mas01cr@0 645 cerr << "expected " << numVectors << " found " << numtimes << endl;
mas01cr@0 646 error("Times file is incorrect length for features file",inFile);
mas01cr@0 647 }
mas01cr@0 648 if(verbosity>2)
mas01cr@0 649 cerr << "numtimes: " << numtimes << endl;
mas01cr@0 650 }
mas01cr@0 651 }
mas01cr@0 652 }
mas01cr@0 653
mas01cr@0 654 void audioDB::batchinsert(const char* dbName, const char* inFile){
mas01cr@0 655
mas01cr@0 656 if ((dbfid = open (dbName, O_RDWR)) < 0)
mas01cr@0 657 error("Can't open database file:", dbName);
mas01cr@0 658
mas01cr@0 659 if(!key)
mas01cr@0 660 key=inFile;
mas01cr@0 661 ifstream *filesIn = 0;
mas01cr@0 662 ifstream *keysIn = 0;
mas01cr@0 663 ifstream* thisTimesFile = 0;
mas01cr@0 664
mas01cr@0 665 if(!(filesIn = new ifstream(inFile)))
mas01cr@0 666 error("Could not open batch in file", inFile);
mas01cr@0 667 if(key && key!=inFile)
mas01cr@0 668 if(!(keysIn = new ifstream(key)))
mas01cr@0 669 error("Could not open batch key file",key);
mas01cr@0 670
mas01cr@0 671 // Get the database header info
mas01cr@0 672 dbH = new dbTableHeaderT();
mas01cr@0 673 assert(dbH);
mas01cr@0 674
mas01cr@0 675 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
mas01cr@0 676 error("error reading db header");
mas01cr@0 677
mas01cr@0 678 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 679 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 680
mas01cr@0 681 fileTableOffset = O2_HEADERSIZE;
mas01cr@0 682 segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
mas01cr@0 683 dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES;
mas01cr@0 684 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 685 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 686
mas01cr@0 687 if(dbH->magic!=O2_MAGIC){
mas01cr@0 688 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl;
mas01cr@0 689 error("database file has incorrect header",dbName);
mas01cr@0 690 }
mas01cr@0 691
mas01cr@0 692
mas01cr@0 693 unsigned totalVectors=0;
mas01cr@0 694 char *thisKey = new char[MAXSTR];
mas01cr@0 695 char *thisFile = new char[MAXSTR];
mas01cr@0 696 char *thisTimesFileName = new char[MAXSTR];
mas01cr@0 697
mas01cr@0 698 do{
mas01cr@0 699 filesIn->getline(thisFile,MAXSTR);
mas01cr@0 700 if(key && key!=inFile)
mas01cr@0 701 keysIn->getline(thisKey,MAXSTR);
mas01cr@0 702 else
mas01cr@0 703 thisKey = thisFile;
mas01cr@0 704 if(usingTimes)
mas01cr@0 705 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@0 706
mas01cr@0 707 if(filesIn->eof())
mas01cr@0 708 break;
mas01cr@0 709
mas01cr@0 710 // open the input file
mas01cr@0 711 if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0)
mas01cr@0 712 error("can't open feature file for reading", thisFile);
mas01cr@0 713
mas01cr@0 714 // find size of input file
mas01cr@0 715 if (thisFile && fstat (infid,&statbuf) < 0)
mas01cr@0 716 error("fstat error finding size of input");
mas01cr@0 717
mas01mc@11 718 // mmap the database file
mas01mc@11 719 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01mc@11 720 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01mc@11 721 error("mmap error for creating database");
mas01mc@11 722
mas01mc@11 723 // Make some handy tables with correct types
mas01mc@11 724 fileTable= (char*)(db+fileTableOffset);
mas01mc@11 725 segTable = (unsigned*)(db+segTableOffset);
mas01mc@11 726 dataBuf = (double*)(db+dataoffset);
mas01mc@11 727 l2normTable = (double*)(db+l2normTableOffset);
mas01mc@11 728 timesTable = (double*)(db+timesTableOffset);
mas01mc@11 729
mas01cr@0 730 // Check that there is room for at least 1 more file
mas01cr@0 731 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
mas01cr@0 732 error("No more room in database","insert failed: reason database is full.");
mas01cr@0 733
mas01cr@0 734 if(thisFile)
mas01cr@0 735 if(dbH->dim==0 && dbH->length==0) // empty database
mas01cr@0 736 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
mas01cr@0 737 else {
mas01cr@0 738 unsigned test;
mas01cr@0 739 read(infid,&test,sizeof(unsigned));
mas01cr@0 740 if(dbH->dim!=test){
mas01cr@0 741 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
mas01cr@0 742 error("feature dimensions do not match database table dimensions");
mas01cr@0 743 }
mas01cr@0 744 }
mas01cr@0 745
mas01cr@0 746 // mmap the input file
mas01cr@0 747 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
mas01cr@0 748 == (caddr_t) -1)
mas01cr@0 749 error("mmap error for input");
mas01cr@0 750
mas01cr@0 751
mas01cr@0 752 // Linear scan of filenames check for pre-existing feature
mas01cr@0 753 unsigned alreadyInserted=0;
mas01cr@0 754
mas01cr@0 755 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 756 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){
mas01cr@0 757 alreadyInserted=1;
mas01cr@0 758 break;
mas01cr@0 759 }
mas01cr@0 760
mas01cr@0 761 if(alreadyInserted){
mas01cr@0 762 if(verbosity)
mas01cr@0 763 cerr << "Warning: key already exists in database:" << thisKey << endl;
mas01cr@0 764 }
mas01cr@0 765 else{
mas01cr@0 766
mas01cr@0 767 // Make a segment index table of features to file indexes
mas01cr@0 768 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 769 if(!numVectors){
mas01cr@0 770 if(verbosity)
mas01cr@0 771 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
mas01cr@0 772 }
mas01cr@0 773 else{
mas01cr@0 774 if(usingTimes){
mas01cr@0 775 if(timesFile->eof())
mas01cr@0 776 error("not enough timestamp files in timesList");
mas01cr@0 777 thisTimesFile=new ifstream(thisTimesFileName,ios::in);
mas01cr@0 778 if(!thisTimesFile->is_open())
mas01cr@0 779 error("Cannot open timestamp file",thisTimesFileName);
mas01cr@0 780 unsigned insertoffset=dbH->length;
mas01cr@0 781 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 782 double* timesdata=timesTable+timesoffset;
mas01cr@0 783 assert(timesdata+numVectors<l2normTable);
mas01cr@0 784 insertTimeStamps(numVectors,thisTimesFile,timesdata);
mas01cr@0 785 if(thisTimesFile)
mas01cr@0 786 delete thisTimesFile;
mas01cr@0 787 }
mas01cr@0 788
mas01cr@0 789 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
mas01cr@0 790
mas01cr@0 791 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 792
mas01cr@0 793 // Increment file count
mas01cr@0 794 dbH->numFiles++;
mas01cr@0 795
mas01cr@0 796 // Update Header information
mas01cr@0 797 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 798 // Copy the header back to the database
mas01cr@0 799 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@0 800
mas01cr@0 801 // Update segment to file index map
mas01cr@0 802 //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
mas01cr@0 803 memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 804
mas01cr@0 805 // Update the feature database
mas01cr@0 806 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
mas01cr@0 807
mas01cr@0 808 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 809 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 810 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 811
mas01cr@0 812 totalVectors+=numVectors;
mas01cr@0 813 }
mas01cr@0 814 }
mas01cr@0 815 // CLEAN UP
mas01cr@0 816 munmap(indata,statbuf.st_size);
mas01cr@0 817 close(infid);
mas01mc@11 818 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 819 }while(!filesIn->eof());
mas01mc@12 820
mas01mc@12 821 // mmap the database file
mas01mc@12 822 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01mc@12 823 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01mc@12 824 error("mmap error for creating database");
mas01cr@0 825
mas01cr@0 826 if(verbosity)
mas01cr@0 827 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
mas01cr@0 828 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
mas01cr@0 829
mas01cr@0 830 // Report status
mas01cr@0 831 status(dbName);
mas01mc@12 832
mas01mc@12 833 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 834 }
mas01cr@0 835
mas01cr@0 836 void audioDB::ws_status(const char*dbName, char* hostport){
mas01cr@0 837 struct soap soap;
mas01cr@0 838 int adbStatusResult;
mas01cr@0 839
mas01cr@0 840 // Query an existing adb database
mas01cr@0 841 soap_init(&soap);
mas01cr@0 842 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK)
mas01cr@0 843 std::cout << "result = " << adbStatusResult << std::endl;
mas01cr@0 844 else
mas01cr@0 845 soap_print_fault(&soap,stderr);
mas01cr@0 846
mas01cr@0 847 soap_destroy(&soap);
mas01cr@0 848 soap_end(&soap);
mas01cr@0 849 soap_done(&soap);
mas01cr@0 850 }
mas01cr@0 851
mas01cr@0 852 void audioDB::ws_query(const char*dbName, const char *segKey, const char* hostport){
mas01cr@0 853 struct soap soap;
mas01cr@0 854 adb__queryResult adbQueryResult;
mas01cr@0 855
mas01cr@0 856 soap_init(&soap);
mas01cr@0 857 if(soap_call_adb__query(&soap,hostport,NULL,
mas01cr@0 858 (char*)dbName,(char*)segKey,(char*)segFileName,(char*)timesFileName,
mas01cr@0 859 queryType, queryPoint, pointNN, segNN, sequenceLength, adbQueryResult)==SOAP_OK){
mas01cr@0 860 //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl;
mas01cr@0 861 for(int i=0; i<adbQueryResult.__sizeRlist; i++)
mas01cr@0 862 std::cout << adbQueryResult.Rlist[i] << " " << adbQueryResult.Dist[i]
mas01cr@0 863 << " " << adbQueryResult.Qpos[i] << " " << adbQueryResult.Spos[i] << std::endl;
mas01cr@0 864 }
mas01cr@0 865 else
mas01cr@0 866 soap_print_fault(&soap,stderr);
mas01cr@0 867
mas01cr@0 868 soap_destroy(&soap);
mas01cr@0 869 soap_end(&soap);
mas01cr@0 870 soap_done(&soap);
mas01cr@0 871
mas01cr@0 872 }
mas01cr@0 873
mas01cr@0 874
mas01cr@0 875 void audioDB::status(const char* dbName){
mas01cr@0 876 if(!dbH)
mas01cr@0 877 initTables(dbName, 0);
mas01cr@0 878
mas01cr@0 879 // Update Header information
mas01cr@0 880 cout << "num files:" << dbH->numFiles << endl;
mas01cr@0 881 cout << "data dim:" << dbH->dim <<endl;
mas01cr@0 882 if(dbH->dim>0){
mas01cr@0 883 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
mas01cr@0 884 cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
mas01cr@0 885 }
mas01cr@0 886 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl;
mas01cr@0 887 cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" <<
mas01cr@0 888 (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl;
mas01cr@0 889 cout << "flags:" << dbH->flags << endl;
mas01cr@0 890
mas01cr@0 891 unsigned dudCount=0;
mas01cr@0 892 unsigned nullCount=0;
mas01cr@0 893 for(unsigned k=0; k<dbH->numFiles; k++){
mas01cr@0 894 if(segTable[k]<sequenceLength){
mas01cr@0 895 dudCount++;
mas01cr@0 896 if(!segTable[k])
mas01cr@0 897 nullCount++;
mas01cr@0 898 }
mas01cr@0 899 }
mas01cr@0 900 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
mas01cr@0 901 }
mas01cr@0 902
mas01cr@0 903
mas01cr@0 904 void audioDB::dump(const char* dbName){
mas01cr@0 905 if(!dbH)
mas01cr@0 906 initTables(dbName,0);
mas01cr@0 907
mas01cr@0 908 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 909 cout << fileTable+k*O2_FILETABLESIZE << " " << segTable[k] << endl;
mas01cr@0 910
mas01cr@0 911 status(dbName);
mas01cr@0 912 }
mas01cr@0 913
mas01cr@0 914 void audioDB::l2norm(const char* dbName){
mas01cr@0 915 initTables(dbName,0);
mas01cr@0 916 if(dbH->length>0){
mas01cr@0 917 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@0 918 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 919 }
mas01cr@0 920 // Update database flags
mas01cr@0 921 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 922 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 923 }
mas01cr@0 924
mas01cr@0 925
mas01cr@0 926
mas01cr@0 927 void audioDB::query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 928 switch(queryType){
mas01cr@0 929 case O2_FLAG_POINT_QUERY:
mas01cr@0 930 pointQuery(dbName, inFile, adbQueryResult);
mas01cr@0 931 break;
mas01cr@0 932 case O2_FLAG_SEQUENCE_QUERY:
mas01cr@0 933 segSequenceQuery(dbName, inFile, adbQueryResult);
mas01cr@0 934 break;
mas01cr@0 935 case O2_FLAG_SEG_QUERY:
mas01cr@0 936 segPointQuery(dbName, inFile, adbQueryResult);
mas01cr@0 937 break;
mas01cr@0 938 default:
mas01cr@0 939 error("unrecognized queryType in query()");
mas01cr@0 940
mas01cr@0 941 }
mas01cr@0 942 }
mas01cr@0 943
mas01cr@0 944 //return ordinal position of key in keyTable
mas01cr@0 945 unsigned audioDB::getKeyPos(char* key){
mas01cr@0 946 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 947 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
mas01cr@0 948 return k;
mas01cr@0 949 error("Key not found",key);
mas01cr@0 950 return O2_ERR_KEYNOTFOUND;
mas01cr@0 951 }
mas01cr@0 952
mas01cr@0 953 // Basic point query engine
mas01cr@0 954 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 955
mas01cr@0 956 initTables(dbName, inFile);
mas01cr@0 957
mas01cr@0 958 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 959 // we use stdout in this stub version
mas01cr@0 960 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 961
mas01cr@0 962 double* query = (double*)(indata+sizeof(int));
mas01cr@0 963 double* data = dataBuf;
mas01cr@0 964 double* queryCopy = 0;
mas01cr@0 965
mas01cr@0 966 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 967 // Make a copy of the query
mas01cr@0 968 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 969 qNorm = new double[numVectors];
mas01cr@0 970 assert(queryCopy&&qNorm);
mas01cr@0 971 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 972 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 973 query = queryCopy;
mas01cr@0 974 }
mas01cr@0 975
mas01cr@0 976 // Make temporary dynamic memory for results
mas01cr@0 977 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@0 978 double distances[pointNN];
mas01cr@0 979 unsigned qIndexes[pointNN];
mas01cr@0 980 unsigned sIndexes[pointNN];
mas01cr@0 981 for(unsigned k=0; k<pointNN; k++){
mas01cr@0 982 distances[k]=0.0;
mas01cr@0 983 qIndexes[k]=~0;
mas01cr@0 984 sIndexes[k]=~0;
mas01cr@0 985 }
mas01cr@0 986
mas01cr@0 987 unsigned j=numVectors;
mas01cr@0 988 unsigned k,l,n;
mas01cr@0 989 double thisDist;
mas01cr@0 990
mas01cr@0 991 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
mas01cr@0 992 double meanQdur = 0;
mas01cr@0 993 double* timesdata = 0;
mas01cr@0 994 double* dbdurs = 0;
mas01cr@0 995
mas01cr@0 996 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 997 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 998 usingTimes=0;
mas01cr@0 999 }
mas01cr@0 1000
mas01cr@0 1001 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1002 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1003
mas01cr@0 1004 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1005 timesdata = new double[numVectors];
mas01cr@0 1006 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1007 // Calculate durations of points
mas01cr@0 1008 for(k=0; k<numVectors-1; k++){
mas01cr@0 1009 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1010 meanQdur+=timesdata[k];
mas01cr@0 1011 }
mas01cr@0 1012 meanQdur/=k;
mas01cr@0 1013 // Individual exhaustive timepoint durations
mas01cr@0 1014 dbdurs = new double[totalVecs];
mas01cr@0 1015 for(k=0; k<totalVecs-1; k++)
mas01cr@0 1016 dbdurs[k]=timesTable[k+1]-timesTable[k];
mas01cr@0 1017 j--; // decrement vector counter by one
mas01cr@0 1018 }
mas01cr@0 1019
mas01cr@0 1020 if(usingQueryPoint)
mas01cr@0 1021 if(queryPoint>numVectors-1)
mas01cr@0 1022 error("queryPoint > numVectors in query");
mas01cr@0 1023 else{
mas01cr@0 1024 if(verbosity>1)
mas01cr@0 1025 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@0 1026 query=query+queryPoint*dbH->dim;
mas01cr@0 1027 numVectors=queryPoint+1;
mas01cr@0 1028 j=1;
mas01cr@0 1029 }
mas01cr@0 1030
mas01cr@0 1031 gettimeofday(&tv1, NULL);
mas01cr@0 1032 while(j--){ // query
mas01cr@0 1033 data=dataBuf;
mas01cr@0 1034 k=totalVecs; // number of database vectors
mas01cr@0 1035 while(k--){ // database
mas01cr@0 1036 thisDist=0;
mas01cr@0 1037 l=dbH->dim;
mas01cr@0 1038 double* q=query;
mas01cr@0 1039 while(l--)
mas01cr@0 1040 thisDist+=*q++**data++;
mas01cr@0 1041 if(!usingTimes ||
mas01cr@0 1042 (usingTimes
mas01cr@0 1043 && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){
mas01cr@0 1044 n=pointNN;
mas01cr@0 1045 while(n--){
mas01cr@0 1046 if(thisDist>=distances[n]){
mas01cr@0 1047 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1048 // Copy all values above up the queue
mas01cr@0 1049 for( l=pointNN-1 ; l >= n+1 ; l--){
mas01cr@0 1050 distances[l]=distances[l-1];
mas01cr@0 1051 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1052 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1053 }
mas01cr@0 1054 distances[n]=thisDist;
mas01cr@0 1055 qIndexes[n]=numVectors-j-1;
mas01cr@0 1056 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
mas01cr@0 1057 break;
mas01cr@0 1058 }
mas01cr@0 1059 }
mas01cr@0 1060 else
mas01cr@0 1061 break;
mas01cr@0 1062 }
mas01cr@0 1063 }
mas01cr@0 1064 }
mas01cr@0 1065 // Move query pointer to next query point
mas01cr@0 1066 query+=dbH->dim;
mas01cr@0 1067 }
mas01cr@0 1068
mas01cr@0 1069 gettimeofday(&tv2, NULL);
mas01cr@0 1070 if(verbosity>1)
mas01cr@0 1071 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@0 1072
mas01cr@0 1073 if(adbQueryResult==0){
mas01cr@0 1074 // Output answer
mas01cr@0 1075 // Loop over nearest neighbours
mas01cr@0 1076 for(k=0; k < pointNN; k++){
mas01cr@0 1077 // Scan for key
mas01cr@0 1078 unsigned cumSeg=0;
mas01cr@0 1079 for(l=0 ; l<dbH->numFiles; l++){
mas01cr@0 1080 cumSeg+=segTable[l];
mas01cr@0 1081 if(sIndexes[k]<cumSeg){
mas01cr@0 1082 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
mas01cr@0 1083 << sIndexes[k]+segTable[l]-cumSeg << endl;
mas01cr@0 1084 break;
mas01cr@0 1085 }
mas01cr@0 1086 }
mas01cr@0 1087 }
mas01cr@0 1088 }
mas01cr@0 1089 else{ // Process Web Services Query
mas01cr@0 1090 int listLen = pointNN;
mas01cr@0 1091 adbQueryResult->__sizeRlist=listLen;
mas01cr@0 1092 adbQueryResult->__sizeDist=listLen;
mas01cr@0 1093 adbQueryResult->__sizeQpos=listLen;
mas01cr@0 1094 adbQueryResult->__sizeSpos=listLen;
mas01cr@0 1095 adbQueryResult->Rlist= new char*[listLen];
mas01cr@0 1096 adbQueryResult->Dist = new double[listLen];
mas01cr@0 1097 adbQueryResult->Qpos = new int[listLen];
mas01cr@0 1098 adbQueryResult->Spos = new int[listLen];
mas01cr@0 1099 for(k=0; k<adbQueryResult->__sizeRlist; k++){
mas01cr@0 1100 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@0 1101 adbQueryResult->Dist[k]=distances[k];
mas01cr@0 1102 adbQueryResult->Qpos[k]=qIndexes[k];
mas01cr@0 1103 unsigned cumSeg=0;
mas01cr@0 1104 for(l=0 ; l<dbH->numFiles; l++){
mas01cr@0 1105 cumSeg+=segTable[l];
mas01cr@0 1106 if(sIndexes[k]<cumSeg){
mas01cr@0 1107 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
mas01cr@0 1108 break;
mas01cr@0 1109 }
mas01cr@0 1110 }
mas01cr@0 1111 adbQueryResult->Spos[k]=sIndexes[k]+segTable[l]-cumSeg;
mas01cr@0 1112 }
mas01cr@0 1113 }
mas01cr@0 1114
mas01cr@0 1115 // Clean up
mas01cr@0 1116 if(queryCopy)
mas01cr@0 1117 delete queryCopy;
mas01cr@0 1118 if(qNorm)
mas01cr@0 1119 delete qNorm;
mas01cr@0 1120 if(timesdata)
mas01cr@0 1121 delete timesdata;
mas01cr@0 1122 if(dbdurs)
mas01cr@0 1123 delete dbdurs;
mas01cr@0 1124 }
mas01cr@0 1125
mas01cr@0 1126 void audioDB::sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 1127
mas01cr@0 1128 }
mas01cr@0 1129
mas01cr@0 1130 // segPointQuery
mas01cr@0 1131 // return the segNN closest segs to the query seg
mas01cr@0 1132 // uses average of pointNN points per seg
mas01cr@0 1133 void audioDB::segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 1134 initTables(dbName, inFile);
mas01cr@0 1135
mas01cr@0 1136 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1137 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1138 unsigned numSegs = dbH->numFiles;
mas01cr@0 1139
mas01cr@0 1140 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1141 double* data = dataBuf;
mas01cr@0 1142 double* queryCopy = 0;
mas01cr@0 1143
mas01cr@0 1144 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 1145 // Make a copy of the query
mas01cr@0 1146 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1147 qNorm = new double[numVectors];
mas01cr@0 1148 assert(queryCopy&&qNorm);
mas01cr@0 1149 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1150 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1151 query = queryCopy;
mas01cr@0 1152 }
mas01cr@0 1153
mas01cr@0 1154 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@0 1155 assert(segNN>0 && segNN<=O2_MAXNN);
mas01cr@0 1156
mas01cr@0 1157 // Make temporary dynamic memory for results
mas01cr@0 1158 double segDistances[segNN];
mas01cr@0 1159 unsigned segIDs[segNN];
mas01cr@0 1160 unsigned segQIndexes[segNN];
mas01cr@0 1161 unsigned segSIndexes[segNN];
mas01cr@0 1162
mas01cr@0 1163 double distances[pointNN];
mas01cr@0 1164 unsigned qIndexes[pointNN];
mas01cr@0 1165 unsigned sIndexes[pointNN];
mas01cr@0 1166
mas01cr@0 1167 unsigned j=numVectors; // number of query points
mas01cr@0 1168 unsigned k,l,n, seg, segOffset=0, processedSegs=0;
mas01cr@0 1169 double thisDist;
mas01cr@0 1170
mas01cr@0 1171 for(k=0; k<pointNN; k++){
mas01cr@0 1172 distances[k]=0.0;
mas01cr@0 1173 qIndexes[k]=~0;
mas01cr@0 1174 sIndexes[k]=~0;
mas01cr@0 1175 }
mas01cr@0 1176
mas01cr@0 1177 for(k=0; k<segNN; k++){
mas01cr@0 1178 segDistances[k]=0.0;
mas01cr@0 1179 segQIndexes[k]=~0;
mas01cr@0 1180 segSIndexes[k]=~0;
mas01cr@0 1181 segIDs[k]=~0;
mas01cr@0 1182 }
mas01cr@0 1183
mas01cr@0 1184 double meanQdur = 0;
mas01cr@0 1185 double* timesdata = 0;
mas01cr@0 1186 double* meanDBdur = 0;
mas01cr@0 1187
mas01cr@0 1188 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1189 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1190 usingTimes=0;
mas01cr@0 1191 }
mas01cr@0 1192
mas01cr@0 1193 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1194 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1195
mas01cr@0 1196 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1197 timesdata = new double[numVectors];
mas01cr@0 1198 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1199 // Calculate durations of points
mas01cr@0 1200 for(k=0; k<numVectors-1; k++){
mas01cr@0 1201 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1202 meanQdur+=timesdata[k];
mas01cr@0 1203 }
mas01cr@0 1204 meanQdur/=k;
mas01cr@0 1205 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1206 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1207 meanDBdur[k]=0.0;
mas01cr@0 1208 for(j=0; j<segTable[k]-1 ; j++)
mas01cr@0 1209 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1210 meanDBdur[k]/=j;
mas01cr@0 1211 }
mas01cr@0 1212 }
mas01cr@0 1213
mas01cr@0 1214 if(usingQueryPoint)
mas01cr@0 1215 if(queryPoint>numVectors-1)
mas01cr@0 1216 error("queryPoint > numVectors in query");
mas01cr@0 1217 else{
mas01cr@0 1218 if(verbosity>1)
mas01cr@0 1219 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@0 1220 query=query+queryPoint*dbH->dim;
mas01cr@0 1221 numVectors=queryPoint+1;
mas01cr@0 1222 }
mas01cr@0 1223
mas01cr@0 1224 // build segment offset table
mas01cr@0 1225 unsigned *segOffsetTable = new unsigned[dbH->numFiles];
mas01cr@0 1226 unsigned cumSeg=0;
mas01cr@0 1227 unsigned segIndexOffset;
mas01cr@0 1228 for(k=0; k<dbH->numFiles;k++){
mas01cr@0 1229 segOffsetTable[k]=cumSeg;
mas01cr@0 1230 cumSeg+=segTable[k]*dbH->dim;
mas01cr@0 1231 }
mas01cr@0 1232
mas01cr@0 1233 char nextKey[MAXSTR];
mas01cr@0 1234
mas01cr@0 1235 gettimeofday(&tv1, NULL);
mas01cr@0 1236
mas01cr@0 1237 for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){
mas01cr@0 1238 if(segFile){
mas01cr@0 1239 if(!segFile->eof()){
mas01cr@0 1240 //*segFile>>seg;
mas01cr@0 1241 segFile->getline(nextKey,MAXSTR);
mas01mc@12 1242 if(verbosity>3){
mas01mc@12 1243 cerr << nextKey << endl;
mas01mc@12 1244 cerr.flush();
mas01mc@12 1245 }
mas01cr@0 1246 seg=getKeyPos(nextKey);
mas01cr@0 1247 }
mas01cr@0 1248 else
mas01cr@0 1249 break;
mas01cr@0 1250 }
mas01cr@0 1251 segOffset=segOffsetTable[seg]; // numDoubles offset
mas01cr@0 1252 segIndexOffset=segOffset/dbH->dim; // numVectors offset
mas01cr@0 1253 if(verbosity>7)
mas01cr@0 1254 cerr << seg << "." << segOffset/(dbH->dim) << "." << segTable[seg] << " | ";cerr.flush();
mas01cr@0 1255
mas01cr@0 1256 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 1257 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
mas01cr@0 1258 else
mas01cr@0 1259 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
mas01cr@0 1260 if(usingQueryPoint)
mas01cr@0 1261 j=1;
mas01cr@0 1262 else
mas01cr@0 1263 j=numVectors;
mas01cr@0 1264 while(j--){
mas01cr@0 1265 k=segTable[seg]; // number of vectors in seg
mas01cr@0 1266 data=dataBuf+segOffset; // data for seg
mas01cr@0 1267 while(k--){
mas01cr@0 1268 thisDist=0;
mas01cr@0 1269 l=dbH->dim;
mas01cr@0 1270 double* q=query;
mas01cr@0 1271 while(l--)
mas01cr@0 1272 thisDist+=*q++**data++;
mas01cr@0 1273 if(!usingTimes ||
mas01cr@0 1274 (usingTimes
mas01cr@0 1275 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1276 n=pointNN;
mas01cr@0 1277 while(n--){
mas01cr@0 1278 if(thisDist>=distances[n]){
mas01cr@0 1279 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1280 // Copy all values above up the queue
mas01cr@0 1281 for( l=pointNN-1 ; l > n ; l--){
mas01cr@0 1282 distances[l]=distances[l-1];
mas01cr@0 1283 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1284 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1285 }
mas01cr@0 1286 distances[n]=thisDist;
mas01cr@0 1287 qIndexes[n]=numVectors-j-1;
mas01cr@0 1288 sIndexes[n]=segTable[seg]-k-1;
mas01cr@0 1289 break;
mas01cr@0 1290 }
mas01cr@0 1291 }
mas01cr@0 1292 else
mas01cr@0 1293 break;
mas01cr@0 1294 }
mas01cr@0 1295 }
mas01cr@0 1296 } // seg
mas01cr@0 1297 // Move query pointer to next query point
mas01cr@0 1298 query+=dbH->dim;
mas01cr@0 1299 } // query
mas01cr@0 1300 // Take the average of this seg's distance
mas01cr@0 1301 // Test the seg distances
mas01cr@0 1302 thisDist=0;
mas01cr@0 1303 n=pointNN;
mas01cr@0 1304 while(n--)
mas01cr@0 1305 thisDist+=distances[pointNN-n-1];
mas01cr@0 1306 thisDist/=pointNN;
mas01cr@0 1307 n=segNN;
mas01cr@0 1308 while(n--){
mas01cr@0 1309 if(thisDist>=segDistances[n]){
mas01cr@0 1310 if((n==0 || thisDist<=segDistances[n-1])){
mas01cr@0 1311 // Copy all values above up the queue
mas01cr@0 1312 for( l=pointNN-1 ; l > n ; l--){
mas01cr@0 1313 segDistances[l]=segDistances[l-1];
mas01cr@0 1314 segQIndexes[l]=segQIndexes[l-1];
mas01cr@0 1315 segSIndexes[l]=segSIndexes[l-1];
mas01cr@0 1316 segIDs[l]=segIDs[l-1];
mas01cr@0 1317 }
mas01cr@0 1318 segDistances[n]=thisDist;
mas01cr@0 1319 segQIndexes[n]=qIndexes[0];
mas01cr@0 1320 segSIndexes[n]=sIndexes[0];
mas01cr@0 1321 segIDs[n]=seg;
mas01cr@0 1322 break;
mas01cr@0 1323 }
mas01cr@0 1324 }
mas01cr@0 1325 else
mas01cr@0 1326 break;
mas01cr@0 1327 }
mas01cr@0 1328 for(unsigned k=0; k<pointNN; k++){
mas01cr@0 1329 distances[k]=0.0;
mas01cr@0 1330 qIndexes[k]=~0;
mas01cr@0 1331 sIndexes[k]=~0;
mas01cr@0 1332 }
mas01cr@0 1333 } // segs
mas01cr@0 1334 gettimeofday(&tv2, NULL);
mas01cr@0 1335
mas01cr@0 1336 if(verbosity>1)
mas01cr@0 1337 cerr << endl << "processed segs :" << processedSegs
mas01cr@0 1338 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@0 1339
mas01cr@0 1340 if(adbQueryResult==0){
mas01cr@0 1341 if(verbosity>1)
mas01cr@0 1342 cerr<<endl;
mas01cr@0 1343 // Output answer
mas01cr@0 1344 // Loop over nearest neighbours
mas01cr@0 1345 for(k=0; k < min(segNN,processedSegs); k++)
mas01cr@0 1346 cout << fileTable+segIDs[k]*O2_FILETABLESIZE
mas01cr@0 1347 << " " << segDistances[k] << " " << segQIndexes[k] << " " << segSIndexes[k] << endl;
mas01cr@0 1348 }
mas01cr@0 1349 else{ // Process Web Services Query
mas01cr@0 1350 int listLen = min(segNN, processedSegs);
mas01cr@0 1351 adbQueryResult->__sizeRlist=listLen;
mas01cr@0 1352 adbQueryResult->__sizeDist=listLen;
mas01cr@0 1353 adbQueryResult->__sizeQpos=listLen;
mas01cr@0 1354 adbQueryResult->__sizeSpos=listLen;
mas01cr@0 1355 adbQueryResult->Rlist= new char*[listLen];
mas01cr@0 1356 adbQueryResult->Dist = new double[listLen];
mas01cr@0 1357 adbQueryResult->Qpos = new int[listLen];
mas01cr@0 1358 adbQueryResult->Spos = new int[listLen];
mas01cr@0 1359 for(k=0; k<adbQueryResult->__sizeRlist; k++){
mas01cr@0 1360 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@0 1361 adbQueryResult->Dist[k]=segDistances[k];
mas01cr@0 1362 adbQueryResult->Qpos[k]=segQIndexes[k];
mas01cr@0 1363 adbQueryResult->Spos[k]=segSIndexes[k];
mas01cr@0 1364 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE);
mas01cr@0 1365 }
mas01cr@0 1366 }
mas01cr@0 1367
mas01cr@0 1368
mas01cr@0 1369 // Clean up
mas01cr@0 1370 if(segOffsetTable)
mas01cr@0 1371 delete segOffsetTable;
mas01cr@0 1372 if(queryCopy)
mas01cr@0 1373 delete queryCopy;
mas01cr@0 1374 if(qNorm)
mas01cr@0 1375 delete qNorm;
mas01cr@0 1376 if(timesdata)
mas01cr@0 1377 delete timesdata;
mas01cr@0 1378 if(meanDBdur)
mas01cr@0 1379 delete meanDBdur;
mas01cr@0 1380
mas01cr@0 1381 }
mas01cr@0 1382
mas01cr@0 1383 void audioDB::deleteDB(const char* dbName, const char* inFile){
mas01cr@0 1384
mas01cr@0 1385 }
mas01cr@0 1386
mas01cr@0 1387 // NBest matched filter distance between query and target segs
mas01cr@0 1388 // efficient implementation
mas01cr@0 1389 // outputs average of N minimum matched filter distances
mas01cr@0 1390 void audioDB::segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 1391
mas01cr@0 1392 initTables(dbName, inFile);
mas01cr@0 1393
mas01cr@0 1394 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1395 // we use stdout in this stub version
mas01cr@0 1396 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1397 unsigned numSegs = dbH->numFiles;
mas01cr@0 1398
mas01cr@0 1399 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1400 double* data = dataBuf;
mas01cr@0 1401 double* queryCopy = 0;
mas01cr@0 1402
mas01cr@0 1403 double qMeanL2;
mas01cr@0 1404 double* sMeanL2;
mas01cr@0 1405
mas01cr@0 1406 unsigned USE_THRESH=0;
mas01cr@0 1407 double SILENCE_THRESH=0;
mas01cr@0 1408 double DIFF_THRESH=0;
mas01cr@0 1409
mas01cr@0 1410 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 1411 error("Database must be L2 normed for sequence query","use -l2norm");
mas01cr@0 1412
mas01cr@0 1413 if(verbosity>1)
mas01cr@0 1414 cerr << "performing norms ... "; cerr.flush();
mas01cr@0 1415 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@0 1416 // Make a copy of the query
mas01cr@0 1417 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1418 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1419 qNorm = new double[numVectors];
mas01cr@0 1420 sNorm = new double[dbVectors];
mas01cr@0 1421 sMeanL2=new double[dbH->numFiles];
mas01cr@0 1422 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
mas01cr@0 1423 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1424 query = queryCopy;
mas01cr@0 1425 // Make norm measurements relative to sequenceLength
mas01cr@0 1426 unsigned w = sequenceLength-1;
mas01cr@0 1427 unsigned i,j;
mas01cr@0 1428 double* ps;
mas01cr@0 1429 double tmp1,tmp2;
mas01cr@0 1430 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@0 1431 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@0 1432 double* snPtr = sNorm;
mas01cr@0 1433 for(i=0; i<dbH->numFiles; i++){
mas01cr@0 1434 if(segTable[i]>sequenceLength){
mas01cr@0 1435 tmp1=*snPtr;
mas01cr@0 1436 j=1;
mas01cr@0 1437 w=sequenceLength-1;
mas01cr@0 1438 while(w--)
mas01cr@0 1439 *snPtr+=snPtr[j++];
mas01cr@0 1440 ps = snPtr+1;
mas01cr@0 1441 w=segTable[i]-sequenceLength; // +1 - 1
mas01cr@0 1442 while(w--){
mas01cr@0 1443 tmp2=*ps;
mas01cr@0 1444 *ps=*(ps-1)-tmp1+*(ps+sequenceLength);
mas01cr@0 1445 tmp1=tmp2;
mas01cr@0 1446 ps++;
mas01cr@0 1447 }
mas01cr@0 1448 }
mas01cr@0 1449 snPtr+=segTable[i];
mas01cr@0 1450 }
mas01cr@0 1451
mas01cr@0 1452 double* pn = sMeanL2;
mas01cr@0 1453 w=dbH->numFiles;
mas01cr@0 1454 while(w--)
mas01cr@0 1455 *pn++=0.0;
mas01cr@0 1456 ps=sNorm;
mas01cr@0 1457 unsigned processedSegs=0;
mas01cr@0 1458 for(i=0; i<dbH->numFiles; i++){
mas01cr@0 1459 if(segTable[i]>sequenceLength-1){
mas01cr@0 1460 w = segTable[i]-sequenceLength+1;
mas01cr@0 1461 pn = sMeanL2+i;
mas01cr@0 1462 while(w--)
mas01cr@0 1463 *pn+=*ps++;
mas01cr@0 1464 *pn/=segTable[i]-sequenceLength+1;
mas01cr@0 1465 SILENCE_THRESH+=*pn;
mas01cr@0 1466 processedSegs++;
mas01cr@0 1467 }
mas01mc@12 1468 ps = sNorm + segTable[i];
mas01cr@0 1469 }
mas01cr@0 1470 if(verbosity>1)
mas01cr@0 1471 cerr << "processedSegs: " << processedSegs << endl;
mas01cr@0 1472 SILENCE_THRESH/=processedSegs;
mas01cr@0 1473 USE_THRESH=1; // Turn thresholding on
mas01cr@0 1474 DIFF_THRESH=SILENCE_THRESH/=2; // 50% of the mean shingle power
mas01cr@0 1475 SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE
mas01cr@0 1476
mas01cr@0 1477 w=sequenceLength-1;
mas01cr@0 1478 i=1;
mas01cr@0 1479 tmp1=*qNorm;
mas01cr@0 1480 while(w--)
mas01cr@0 1481 *qNorm+=qNorm[i++];
mas01cr@0 1482 ps = qNorm+1;
mas01cr@0 1483 qMeanL2 = *qNorm;
mas01cr@0 1484 w=numVectors-sequenceLength;
mas01cr@0 1485 while(w--){
mas01cr@0 1486 tmp2=*ps;
mas01cr@0 1487 *ps=*(ps-1)-tmp1+*(ps+sequenceLength);
mas01cr@0 1488 tmp1=tmp2;
mas01cr@0 1489 qMeanL2+=*ps;
mas01cr@0 1490 *ps++;
mas01cr@0 1491 }
mas01cr@0 1492 qMeanL2 /= numVectors-sequenceLength+1;
mas01cr@0 1493 if(verbosity>1)
mas01cr@0 1494 cerr << "done." << endl;
mas01cr@0 1495
mas01cr@0 1496
mas01cr@0 1497 if(verbosity>1)
mas01cr@0 1498 cerr << "matching segs..." << endl;
mas01cr@0 1499
mas01cr@0 1500 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@0 1501 assert(segNN>0 && segNN<=O2_MAXNN);
mas01cr@0 1502
mas01cr@0 1503 // Make temporary dynamic memory for results
mas01cr@0 1504 double segDistances[segNN];
mas01cr@0 1505 unsigned segIDs[segNN];
mas01cr@0 1506 unsigned segQIndexes[segNN];
mas01cr@0 1507 unsigned segSIndexes[segNN];
mas01cr@0 1508
mas01cr@0 1509 double distances[pointNN];
mas01cr@0 1510 unsigned qIndexes[pointNN];
mas01cr@0 1511 unsigned sIndexes[pointNN];
mas01cr@0 1512
mas01cr@0 1513
mas01cr@0 1514 unsigned k,l,m,n,seg,segOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@0 1515 double thisDist;
mas01cr@0 1516 double oneOverWL=1.0/wL;
mas01cr@0 1517
mas01cr@0 1518 for(k=0; k<pointNN; k++){
mas01cr@0 1519 distances[k]=0.0;
mas01cr@0 1520 qIndexes[k]=~0;
mas01cr@0 1521 sIndexes[k]=~0;
mas01cr@0 1522 }
mas01cr@0 1523
mas01cr@0 1524 for(k=0; k<segNN; k++){
mas01cr@0 1525 segDistances[k]=0.0;
mas01cr@0 1526 segQIndexes[k]=~0;
mas01cr@0 1527 segSIndexes[k]=~0;
mas01cr@0 1528 segIDs[k]=~0;
mas01cr@0 1529 }
mas01cr@0 1530
mas01cr@0 1531 // Timestamp and durations processing
mas01cr@0 1532 double meanQdur = 0;
mas01cr@0 1533 double* timesdata = 0;
mas01cr@0 1534 double* meanDBdur = 0;
mas01cr@0 1535
mas01cr@0 1536 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1537 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1538 usingTimes=0;
mas01cr@0 1539 }
mas01cr@0 1540
mas01cr@0 1541 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1542 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1543
mas01cr@0 1544 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1545 timesdata = new double[numVectors];
mas01cr@0 1546 assert(timesdata);
mas01cr@0 1547 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1548 // Calculate durations of points
mas01cr@0 1549 for(k=0; k<numVectors-1; k++){
mas01cr@0 1550 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1551 meanQdur+=timesdata[k];
mas01cr@0 1552 }
mas01cr@0 1553 meanQdur/=k;
mas01cr@0 1554 if(verbosity>1)
mas01cr@0 1555 cerr << "mean query file duration: " << meanQdur << endl;
mas01cr@0 1556 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1557 assert(meanDBdur);
mas01cr@0 1558 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1559 meanDBdur[k]=0.0;
mas01cr@0 1560 for(j=0; j<segTable[k]-1 ; j++)
mas01cr@0 1561 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1562 meanDBdur[k]/=j;
mas01cr@0 1563 }
mas01cr@0 1564 }
mas01cr@0 1565
mas01cr@0 1566 if(usingQueryPoint)
mas01cr@0 1567 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@0 1568 error("queryPoint > numVectors-wL+1 in query");
mas01cr@0 1569 else{
mas01cr@0 1570 if(verbosity>1)
mas01cr@0 1571 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@0 1572 query=query+queryPoint*dbH->dim;
mas01cr@0 1573 qNorm=qNorm+queryPoint;
mas01cr@0 1574 numVectors=wL;
mas01cr@0 1575 }
mas01cr@0 1576
mas01cr@0 1577 double ** D = 0; // Cross-correlation between query and target
mas01cr@0 1578 double ** DD = 0; // Matched filter distance
mas01cr@0 1579
mas01cr@0 1580 D = new double*[numVectors];
mas01cr@0 1581 assert(D);
mas01cr@0 1582 DD = new double*[numVectors];
mas01cr@0 1583 assert(DD);
mas01cr@0 1584
mas01cr@0 1585 gettimeofday(&tv1, NULL);
mas01cr@0 1586 processedSegs=0;
mas01cr@0 1587 unsigned successfulSegs=0;
mas01cr@0 1588
mas01cr@0 1589 double* qp;
mas01cr@0 1590 double* sp;
mas01cr@0 1591 double* dp;
mas01cr@0 1592 double diffL2;
mas01cr@0 1593
mas01cr@0 1594 // build segment offset table
mas01cr@0 1595 unsigned *segOffsetTable = new unsigned[dbH->numFiles];
mas01cr@0 1596 unsigned cumSeg=0;
mas01cr@0 1597 unsigned segIndexOffset;
mas01cr@0 1598 for(k=0; k<dbH->numFiles;k++){
mas01cr@0 1599 segOffsetTable[k]=cumSeg;
mas01cr@0 1600 cumSeg+=segTable[k]*dbH->dim;
mas01cr@0 1601 }
mas01cr@0 1602
mas01cr@0 1603 char nextKey [MAXSTR];
mas01mc@12 1604 for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){
mas01cr@0 1605
mas01cr@0 1606 // get segID from file if using a control file
mas01cr@0 1607 if(segFile){
mas01cr@0 1608 if(!segFile->eof()){
mas01cr@0 1609 segFile->getline(nextKey,MAXSTR);
mas01cr@0 1610 seg=getKeyPos(nextKey);
mas01cr@0 1611 }
mas01cr@0 1612 else
mas01cr@0 1613 break;
mas01cr@0 1614 }
mas01mc@12 1615
mas01cr@0 1616 segOffset=segOffsetTable[seg]; // numDoubles offset
mas01cr@0 1617 segIndexOffset=segOffset/dbH->dim; // numVectors offset
mas01cr@0 1618
mas01cr@0 1619 if(sequenceLength<segTable[seg]){ // test for short sequences
mas01cr@0 1620
mas01cr@0 1621 if(verbosity>7)
mas01cr@0 1622 cerr << seg << "." << segIndexOffset << "." << segTable[seg] << " | ";cerr.flush();
mas01cr@0 1623
mas01cr@0 1624 // Cross-correlation matrix
mas01cr@0 1625 for(j=0; j<numVectors;j++){
mas01cr@0 1626 D[j]=new double[segTable[seg]];
mas01cr@0 1627 assert(D[j]);
mas01cr@0 1628
mas01cr@0 1629 }
mas01cr@0 1630
mas01cr@0 1631 // Matched filter matrix
mas01cr@0 1632 for(j=0; j<numVectors;j++){
mas01cr@0 1633 DD[j]=new double[segTable[seg]];
mas01cr@0 1634 assert(DD[j]);
mas01cr@0 1635 }
mas01cr@0 1636
mas01cr@0 1637 // Cross Correlation
mas01cr@0 1638 for(j=0; j<numVectors; j++)
mas01cr@0 1639 for(k=0; k<segTable[seg]; k++){
mas01cr@0 1640 qp=query+j*dbH->dim;
mas01cr@0 1641 sp=dataBuf+segOffset+k*dbH->dim;
mas01cr@0 1642 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@0 1643 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@0 1644 *dp=0.0; // initialize correlation cell
mas01cr@0 1645 l=dbH->dim; // size of vectors
mas01cr@0 1646 while(l--)
mas01cr@0 1647 *dp+=*qp++**sp++;
mas01cr@0 1648 }
mas01cr@0 1649
mas01cr@0 1650 // Matched Filter
mas01cr@0 1651 // HOP SIZE == 1
mas01cr@0 1652 double* spd;
mas01cr@0 1653 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@0 1654 for(w=0; w<wL; w++)
mas01cr@0 1655 for(j=0; j<numVectors-w; j++){
mas01cr@0 1656 sp=DD[j];
mas01cr@0 1657 spd=D[j+w]+w;
mas01cr@0 1658 k=segTable[seg]-w;
mas01cr@0 1659 while(k--)
mas01cr@0 1660 *sp+++=*spd++;
mas01cr@0 1661 }
mas01cr@0 1662 }
mas01cr@0 1663 else{ // HOP_SIZE != 1
mas01cr@0 1664 for(w=0; w<wL; w++)
mas01cr@0 1665 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@0 1666 sp=DD[j];
mas01cr@0 1667 spd=D[j+w]+w;
mas01cr@0 1668 for(k=0; k<segTable[seg]-w; k+=HOP_SIZE){
mas01cr@0 1669 *sp+=*spd;
mas01cr@0 1670 sp+=HOP_SIZE;
mas01cr@0 1671 spd+=HOP_SIZE;
mas01cr@0 1672 }
mas01cr@0 1673 }
mas01cr@0 1674 }
mas01cr@0 1675
mas01mc@12 1676 if(verbosity>3 && usingTimes){
mas01cr@0 1677 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl;
mas01cr@0 1678 cerr.flush();
mas01cr@0 1679 }
mas01cr@0 1680
mas01cr@0 1681 if(!usingTimes ||
mas01cr@0 1682 (usingTimes
mas01cr@0 1683 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1684
mas01mc@12 1685 if(verbosity>3 && usingTimes){
mas01cr@0 1686 cerr << "within duration tolerance." << endl;
mas01cr@0 1687 cerr.flush();
mas01cr@0 1688 }
mas01cr@0 1689
mas01cr@0 1690 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@0 1691 for(j=0;j<numVectors-wL+1;j+=HOP_SIZE)
mas01cr@0 1692 for(k=0;k<segTable[seg]-wL+1;k+=HOP_SIZE){
mas01cr@0 1693
mas01cr@0 1694 diffL2 = fabs(qNorm[j] - sNorm[k]);
mas01cr@0 1695 // Power test
mas01cr@0 1696 if(!USE_THRESH ||
mas01cr@0 1697 // Threshold on mean L2 of Q and S sequences
mas01cr@0 1698 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[k]>SILENCE_THRESH &&
mas01cr@0 1699 // Are both query and target windows above mean energy?
mas01mc@12 1700 (qNorm[j]>qMeanL2 && sNorm[k]>sMeanL2[seg] && diffL2 < DIFF_THRESH )))
mas01cr@0 1701 thisDist=DD[j][k]*oneOverWL;
mas01cr@0 1702 else
mas01cr@0 1703 thisDist=0.0;
mas01cr@0 1704
mas01cr@0 1705 // NBest match algorithm
mas01cr@0 1706 for(m=0; m<pointNN; m++){
mas01cr@0 1707 if(thisDist>=distances[m]){
mas01cr@0 1708 // Shuffle distances up the list
mas01cr@0 1709 for(l=pointNN-1; l>m; l--){
mas01cr@0 1710 distances[l]=distances[l-1];
mas01cr@0 1711 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1712 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1713 }
mas01cr@0 1714 distances[m]=thisDist;
mas01cr@0 1715 if(usingQueryPoint)
mas01cr@0 1716 qIndexes[m]=queryPoint;
mas01cr@0 1717 else
mas01cr@0 1718 qIndexes[m]=j;
mas01cr@0 1719 sIndexes[m]=k;
mas01cr@0 1720 break;
mas01cr@0 1721 }
mas01cr@0 1722 }
mas01cr@0 1723 }
mas01cr@0 1724 // Calculate the mean of the N-Best matches
mas01cr@0 1725 thisDist=0.0;
mas01cr@0 1726 for(m=0; m<pointNN; m++)
mas01cr@0 1727 thisDist+=distances[m];
mas01cr@0 1728 thisDist/=pointNN;
mas01cr@0 1729
mas01mc@12 1730 // Let's see the distances then...
mas01mc@12 1731 if(verbosity>3)
mas01mc@12 1732 cerr << "d[" << fileTable+seg*O2_FILETABLESIZE << "]=" << thisDist << endl;
mas01mc@12 1733
mas01cr@0 1734 // All the seg stuff goes here
mas01cr@0 1735 n=segNN;
mas01cr@0 1736 while(n--){
mas01cr@0 1737 if(thisDist>=segDistances[n]){
mas01cr@0 1738 if((n==0 || thisDist<=segDistances[n-1])){
mas01cr@0 1739 // Copy all values above up the queue
mas01cr@0 1740 for( l=segNN-1 ; l > n ; l--){
mas01cr@0 1741 segDistances[l]=segDistances[l-1];
mas01cr@0 1742 segQIndexes[l]=segQIndexes[l-1];
mas01cr@0 1743 segSIndexes[l]=segSIndexes[l-1];
mas01cr@0 1744 segIDs[l]=segIDs[l-1];
mas01cr@0 1745 }
mas01cr@0 1746 segDistances[n]=thisDist;
mas01cr@0 1747 segQIndexes[n]=qIndexes[0];
mas01cr@0 1748 segSIndexes[n]=sIndexes[0];
mas01cr@0 1749 successfulSegs++;
mas01cr@0 1750 segIDs[n]=seg;
mas01cr@0 1751 break;
mas01cr@0 1752 }
mas01cr@0 1753 }
mas01cr@0 1754 else
mas01cr@0 1755 break;
mas01cr@0 1756 }
mas01cr@0 1757 } // Duration match
mas01cr@0 1758
mas01cr@0 1759 // per-seg reset array values
mas01cr@0 1760 for(unsigned k=0; k<pointNN; k++){
mas01cr@0 1761 distances[k]=0.0;
mas01cr@0 1762 qIndexes[k]=~0;
mas01cr@0 1763 sIndexes[k]=~0;
mas01cr@0 1764 }
mas01cr@0 1765
mas01cr@0 1766 // Clean up current seg
mas01cr@0 1767 if(D!=NULL){
mas01cr@0 1768 for(j=0; j<numVectors; j++)
mas01cr@0 1769 delete[] D[j];
mas01cr@0 1770 }
mas01cr@0 1771
mas01cr@0 1772 if(DD!=NULL){
mas01cr@0 1773 for(j=0; j<numVectors; j++)
mas01cr@0 1774 delete[] DD[j];
mas01cr@0 1775 }
mas01cr@0 1776 }
mas01cr@0 1777 }
mas01cr@0 1778
mas01cr@0 1779 gettimeofday(&tv2,NULL);
mas01cr@0 1780 if(verbosity>1)
mas01cr@0 1781 cerr << endl << "processed segs :" << processedSegs << " matched segments: " << successfulSegs << " elapsed time:"
mas01cr@0 1782 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@0 1783
mas01cr@0 1784 if(adbQueryResult==0){
mas01cr@0 1785 if(verbosity>1)
mas01cr@0 1786 cerr<<endl;
mas01cr@0 1787 // Output answer
mas01cr@0 1788 // Loop over nearest neighbours
mas01cr@0 1789 for(k=0; k < min(segNN,successfulSegs); k++)
mas01cr@0 1790 cout << fileTable+segIDs[k]*O2_FILETABLESIZE << " " << segDistances[k] << " " << segQIndexes[k] << " " << segSIndexes[k] << endl;
mas01cr@0 1791 }
mas01cr@0 1792 else{ // Process Web Services Query
mas01cr@0 1793 int listLen = min(segNN, processedSegs);
mas01cr@0 1794 adbQueryResult->__sizeRlist=listLen;
mas01cr@0 1795 adbQueryResult->__sizeDist=listLen;
mas01cr@0 1796 adbQueryResult->__sizeQpos=listLen;
mas01cr@0 1797 adbQueryResult->__sizeSpos=listLen;
mas01cr@0 1798 adbQueryResult->Rlist= new char*[listLen];
mas01cr@0 1799 adbQueryResult->Dist = new double[listLen];
mas01cr@0 1800 adbQueryResult->Qpos = new int[listLen];
mas01cr@0 1801 adbQueryResult->Spos = new int[listLen];
mas01cr@0 1802 for(k=0; k<adbQueryResult->__sizeRlist; k++){
mas01cr@0 1803 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@0 1804 adbQueryResult->Dist[k]=segDistances[k];
mas01cr@0 1805 adbQueryResult->Qpos[k]=segQIndexes[k];
mas01cr@0 1806 adbQueryResult->Spos[k]=segSIndexes[k];
mas01cr@0 1807 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE);
mas01cr@0 1808 }
mas01cr@0 1809 }
mas01cr@0 1810
mas01cr@0 1811
mas01cr@0 1812 // Clean up
mas01cr@0 1813 if(segOffsetTable)
mas01cr@0 1814 delete segOffsetTable;
mas01cr@0 1815 if(queryCopy)
mas01cr@0 1816 delete queryCopy;
mas01cr@0 1817 //if(qNorm)
mas01cr@0 1818 //delete qNorm;
mas01cr@0 1819 if(D)
mas01cr@0 1820 delete[] D;
mas01cr@0 1821 if(DD)
mas01cr@0 1822 delete[] DD;
mas01cr@0 1823 if(timesdata)
mas01cr@0 1824 delete timesdata;
mas01cr@0 1825 if(meanDBdur)
mas01cr@0 1826 delete meanDBdur;
mas01cr@0 1827
mas01cr@0 1828
mas01cr@0 1829 }
mas01cr@0 1830
mas01cr@0 1831 void audioDB::normalize(double* X, int dim, int n){
mas01cr@0 1832 unsigned c = n*dim;
mas01cr@0 1833 double minval,maxval,v,*p;
mas01cr@0 1834
mas01cr@0 1835 p=X;
mas01cr@0 1836 while(c--){
mas01cr@0 1837 v=*p++;
mas01cr@0 1838 if(v<minval)
mas01cr@0 1839 minval=v;
mas01cr@0 1840 else if(v>maxval)
mas01cr@0 1841 maxval=v;
mas01cr@0 1842 }
mas01cr@0 1843
mas01cr@0 1844 normalize(X, dim, n, minval, maxval);
mas01cr@0 1845
mas01cr@0 1846 }
mas01cr@0 1847
mas01cr@0 1848 void audioDB::normalize(double* X, int dim, int n, double minval, double maxval){
mas01cr@0 1849 unsigned c = n*dim;
mas01cr@0 1850 double *p;
mas01cr@0 1851
mas01cr@0 1852
mas01cr@0 1853 if(maxval==minval)
mas01cr@0 1854 return;
mas01cr@0 1855
mas01cr@0 1856 maxval=1.0/(maxval-minval);
mas01cr@0 1857 c=n*dim;
mas01cr@0 1858 p=X;
mas01cr@0 1859
mas01cr@0 1860 while(c--){
mas01cr@0 1861 *p=(*p-minval)*maxval;
mas01cr@0 1862 p++;
mas01cr@0 1863 }
mas01cr@0 1864 }
mas01cr@0 1865
mas01cr@0 1866 // Unit norm block of features
mas01cr@0 1867 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@0 1868 unsigned d;
mas01cr@0 1869 double L2, oneOverL2, *p;
mas01cr@0 1870 if(verbosity>2)
mas01cr@0 1871 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@0 1872 while(n--){
mas01cr@0 1873 p=X;
mas01cr@0 1874 L2=0.0;
mas01cr@0 1875 d=dim;
mas01cr@0 1876 while(d--){
mas01cr@0 1877 L2+=*p**p;
mas01cr@0 1878 p++;
mas01cr@0 1879 }
mas01cr@0 1880 L2=sqrt(L2);
mas01cr@0 1881 if(qNorm)
mas01cr@0 1882 *qNorm++=L2;
mas01cr@0 1883 oneOverL2 = 1.0/L2;
mas01cr@0 1884 d=dim;
mas01cr@0 1885 while(d--){
mas01cr@0 1886 *X*=oneOverL2;
mas01cr@0 1887 X++;
mas01cr@0 1888 }
mas01cr@0 1889 }
mas01cr@0 1890 if(verbosity>2)
mas01cr@0 1891 cerr << "done..." << endl;
mas01cr@0 1892 }
mas01cr@0 1893
mas01cr@0 1894 // Unit norm block of features
mas01cr@0 1895 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 1896 unsigned d;
mas01cr@0 1897 double L2, oneOverL2, *p;
mas01cr@0 1898 unsigned nn = n;
mas01cr@0 1899
mas01cr@0 1900 assert(l2normTable);
mas01cr@0 1901
mas01cr@0 1902 if( !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 1903 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 1904
mas01cr@0 1905 if(verbosity>2)
mas01cr@0 1906 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@0 1907
mas01cr@0 1908 double* l2buf = new double[n];
mas01cr@0 1909 double* l2ptr = l2buf;
mas01cr@0 1910 assert(l2buf);
mas01cr@0 1911 assert(X);
mas01cr@0 1912
mas01cr@0 1913 while(nn--){
mas01cr@0 1914 p=X;
mas01cr@0 1915 *l2ptr=0.0;
mas01cr@0 1916 d=dim;
mas01cr@0 1917 while(d--){
mas01cr@0 1918 *l2ptr+=*p**p;
mas01cr@0 1919 p++;
mas01cr@0 1920 }
mas01cr@0 1921 *l2ptr=sqrt(*l2ptr);
mas01cr@0 1922 oneOverL2 = 1.0/(*l2ptr++);
mas01cr@0 1923 d=dim;
mas01cr@0 1924 while(d--){
mas01cr@0 1925 *X*=oneOverL2;
mas01cr@0 1926 X++;
mas01cr@0 1927 }
mas01cr@0 1928 }
mas01cr@0 1929 unsigned offset;
mas01cr@0 1930 if(append)
mas01cr@0 1931 offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors
mas01cr@0 1932 else
mas01cr@0 1933 offset=0;
mas01cr@0 1934 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 1935 if(l2buf)
mas01cr@0 1936 delete l2buf;
mas01cr@0 1937 if(verbosity>2)
mas01cr@0 1938 cerr << "done..." << endl;
mas01cr@0 1939 }
mas01cr@0 1940
mas01cr@0 1941
mas01cr@0 1942 // Start an audioDB server on the host
mas01cr@0 1943 void audioDB::startServer(){
mas01cr@0 1944 struct soap soap;
mas01cr@0 1945 int m, s; // master and slave sockets
mas01cr@0 1946 soap_init(&soap);
mas01cr@0 1947 m = soap_bind(&soap, NULL, port, 100);
mas01cr@0 1948 if (m < 0)
mas01cr@0 1949 soap_print_fault(&soap, stderr);
mas01cr@0 1950 else
mas01cr@0 1951 {
mas01cr@0 1952 fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
mas01cr@0 1953 for (int i = 1; ; i++)
mas01cr@0 1954 {
mas01cr@0 1955 s = soap_accept(&soap);
mas01cr@0 1956 if (s < 0)
mas01cr@0 1957 {
mas01cr@0 1958 soap_print_fault(&soap, stderr);
mas01cr@0 1959 break;
mas01cr@0 1960 }
mas01cr@0 1961 fprintf(stderr, "%d: accepted connection from IP=%d.%d.%d.%d socket=%d\n", i,
mas01cr@0 1962 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
mas01cr@0 1963 if (soap_serve(&soap) != SOAP_OK) // process RPC request
mas01cr@0 1964 soap_print_fault(&soap, stderr); // print error
mas01cr@0 1965 fprintf(stderr, "request served\n");
mas01cr@0 1966 soap_destroy(&soap); // clean up class instances
mas01cr@0 1967 soap_end(&soap); // clean up everything and close socket
mas01cr@0 1968 }
mas01cr@0 1969 }
mas01cr@0 1970 soap_done(&soap); // close master socket and detach environment
mas01cr@0 1971 }
mas01cr@0 1972
mas01cr@0 1973
mas01cr@0 1974 // web services
mas01cr@0 1975
mas01cr@0 1976 // SERVER SIDE
mas01cr@0 1977 int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){
mas01cr@0 1978 char* const argv[]={"audioDB",COM_STATUS,dbName};
mas01cr@0 1979 const unsigned argc = 3;
mas01cr@0 1980 audioDB(argc,argv);
mas01cr@0 1981 adbCreateResult=100;
mas01cr@0 1982 return SOAP_OK;
mas01cr@0 1983 }
mas01cr@0 1984
mas01cr@0 1985 // Literal translation of command line to web service
mas01cr@0 1986
mas01cr@0 1987 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int seqLen, adb__queryResult &adbQueryResult){
mas01cr@0 1988 char queryType[256];
mas01cr@0 1989 for(int k=0; k<256; k++)
mas01cr@0 1990 queryType[k]='\0';
mas01cr@0 1991 if(qType == O2_FLAG_POINT_QUERY)
mas01cr@0 1992 strncpy(queryType, "point", strlen("point"));
mas01cr@0 1993 else if (qType == O2_FLAG_SEQUENCE_QUERY)
mas01cr@0 1994 strncpy(queryType, "sequence", strlen("sequence"));
mas01cr@0 1995 else if(qType == O2_FLAG_SEG_QUERY)
mas01cr@0 1996 strncpy(queryType,"segment", strlen("segment"));
mas01cr@0 1997 else
mas01cr@0 1998 strncpy(queryType, "", strlen(""));
mas01cr@0 1999
mas01cr@0 2000 if(pointNN==0)
mas01cr@0 2001 pointNN=10;
mas01cr@0 2002 if(segNN==0)
mas01cr@0 2003 segNN=10;
mas01cr@0 2004 if(seqLen==0)
mas01cr@0 2005 seqLen=16;
mas01cr@0 2006
mas01cr@0 2007 char qPosStr[256];
mas01cr@0 2008 sprintf(qPosStr, "%d", qPos);
mas01cr@0 2009 char pointNNStr[256];
mas01cr@0 2010 sprintf(pointNNStr,"%d",pointNN);
mas01cr@0 2011 char segNNStr[256];
mas01cr@0 2012 sprintf(segNNStr,"%d",segNN);
mas01cr@0 2013 char seqLenStr[256];
mas01cr@0 2014 sprintf(seqLenStr,"%d",seqLen);
mas01cr@0 2015
mas01cr@0 2016 const char* argv[] ={
mas01cr@0 2017 "./audioDB",
mas01cr@0 2018 COM_QUERY,
mas01cr@0 2019 queryType, // Need to pass a parameter
mas01cr@0 2020 COM_DATABASE,
mas01cr@0 2021 dbName,
mas01cr@0 2022 COM_FEATURES,
mas01cr@0 2023 qKey,
mas01cr@0 2024 COM_KEYLIST,
mas01cr@0 2025 keyList==0?"":keyList,
mas01cr@0 2026 COM_TIMES,
mas01cr@0 2027 timesFileName==0?"":timesFileName,
mas01cr@0 2028 COM_QPOINT,
mas01cr@0 2029 qPosStr,
mas01cr@0 2030 COM_POINTNN,
mas01cr@0 2031 pointNNStr,
mas01cr@0 2032 COM_SEGNN,
mas01cr@0 2033 segNNStr, // Need to pass a parameter
mas01cr@0 2034 COM_SEQLEN,
mas01cr@0 2035 seqLenStr
mas01cr@0 2036 };
mas01cr@0 2037
mas01cr@0 2038 const unsigned argc = 19;
mas01cr@0 2039 audioDB(argc, (char* const*)argv, &adbQueryResult);
mas01cr@0 2040 return SOAP_OK;
mas01cr@0 2041 }
mas01cr@0 2042
mas01cr@0 2043 int main(const unsigned argc, char* const argv[]){
mas01cr@0 2044 audioDB(argc, argv);
mas01cr@0 2045 }
mas01cr@0 2046
mas01cr@0 2047