annotate audioDB.cpp @ 24:e614ae0ebfe1 audiodb-debian

Include init.d script and defaults file for running an audiodb web services server automatically. Update changelog for this change (and for svn update)
author mas01cr
date Thu, 16 Aug 2007 10:50:34 +0000
parents 42498552b30c
children 5485586a5378
rev   line source
mas01cr@0 1 /* audioDB.cpp
mas01cr@0 2
mas01cr@0 3 audioDB version 1.0
mas01cr@0 4
mas01cr@0 5 A feature vector database management system for content-based retrieval.
mas01cr@0 6
mas01cr@0 7 Usage: audioDB [OPTIONS]...
mas01cr@0 8
mas01cr@0 9 --full-help Print help, including hidden options, and exit
mas01cr@0 10 -V, --version Print version and exit
mas01cr@0 11 -H, --help print help on audioDB usage and exit.
mas01cr@0 12 -v, --verbosity=detail level of detail of operational information.
mas01cr@0 13 (default=`1')
mas01cr@0 14
mas01cr@0 15 Database Setup:
mas01cr@0 16 All database operations require a database argument.
mas01cr@0 17
mas01cr@0 18 Database commands are UPPER CASE. Command options are lower case.
mas01cr@0 19
mas01cr@0 20 -d, --database=filename database file required by Database commands.
mas01cr@0 21 -N, --NEW make a new (initially empty) database.
mas01cr@0 22 -S, --STATUS output database information to stdout.
mas01cr@0 23 -D, --DUMP output all entries: index key size.
mas01cr@0 24 -L, --L2NORM unit norm vectors and norm all future inserts.
mas01cr@0 25
mas01cr@0 26 Database Insertion:
mas01cr@0 27 The following commands insert feature files, with optional keys and
mas01cr@0 28 timestamps.
mas01cr@0 29
mas01cr@0 30 -I, --INSERT add feature vectors to an existing database.
mas01cr@0 31 -U, --UPDATE replace inserted vectors associated with key
mas01cr@0 32 with new input vectors.
mas01cr@0 33 -f, --features=filename binary series of vectors file {int sz:ieee
mas01cr@0 34 double[][sz]:eof}.
mas01cr@0 35 -t, --times=filename list of time points (ascii) for feature vectors.
mas01cr@0 36 -k, --key=identifier unique identifier associated with features.
mas01cr@0 37
mas01cr@0 38 -B, --BATCHINSERT add feature vectors named in a --featureList
mas01cr@0 39 file (with optional keys in a --keyList file)
mas01cr@0 40 to the named database.
mas01cr@0 41 -F, --featureList=filename text file containing list of binary feature
mas01cr@0 42 vector files to process
mas01cr@0 43 -T, --timesList=filename text file containing list of ascii --times for
mas01cr@0 44 each --features file in --featureList.
mas01cr@0 45 -K, --keyList=filename text file containing list of unique identifiers
mas01cr@0 46 associated with --features.
mas01cr@0 47
mas01cr@0 48 Database Search:
mas01cr@0 49 Thse commands control the retrieval behaviour.
mas01cr@0 50
mas01cr@0 51 -Q, --QUERY=searchtype content-based search on --database using
mas01cr@0 52 --features as a query. Optionally restrict the
mas01cr@23 53 search to those tracks identified in a
mas01cr@0 54 --keyList. (possible values="point",
mas01cr@23 55 "track", "sequence")
mas01cr@0 56 -p, --qpoint=position ordinal position of query start point in
mas01cr@0 57 --features file. (default=`0')
mas01cr@0 58 -e, --exhaustive exhaustive search: iterate through all query
mas01cr@0 59 vectors in search. Overrides --qpoint.
mas01cr@0 60 (default=off)
mas01cr@0 61 -n, --pointnn=numpoints number of point nearest neighbours to use in
mas01cr@0 62 retrieval. (default=`10')
mas01cr@0 63 -R, --radius=DOUBLE radius search, returns all
mas01cr@23 64 points/tracks/sequences inside given radius.
mas01cr@0 65 (default=`1.0')
mas01cr@0 66 -x, --expandfactor=DOUBLE time compress/expand factor of result length to
mas01cr@0 67 query length [1.0 .. 100.0]. (default=`1.1')
mas01cr@0 68 -o, --rotate rotate query vectors for rotationally invariant
mas01cr@0 69 search. (default=off)
mas01cr@0 70 -r, --resultlength=length maximum length of the result list.
mas01cr@0 71 (default=`10')
mas01cr@0 72 -l, --sequencelength=length length of sequences for sequence search.
mas01cr@0 73 (default=`16')
mas01cr@0 74 -h, --sequencehop=hop hop size of sequence window for sequence search.
mas01cr@0 75 (default=`1')
mas01cr@0 76
mas01cr@0 77 Web Services:
mas01cr@0 78 These commands enable the database process to establish a connection via the
mas01cr@0 79 internet and operate as separate client and server processes.
mas01cr@0 80
mas01cr@0 81 -s, --SERVER=port run as standalone web service on named port.
mas01cr@0 82 (default=`80011')
mas01cr@0 83 -c, --client=hostname:port run as a client using named host service.
mas01cr@0 84
mas01cr@0 85 Copyright (C) 2007 Michael Casey, Goldsmiths, University of London
mas01cr@0 86
mas01cr@0 87 outputs:
mas01cr@0 88
mas01cr@0 89 key1 distance1 qpos1 spos1
mas01cr@0 90 key2 distance2 qpos2 spos2
mas01cr@0 91 ...
mas01cr@0 92 keyN distanceN qposN sposN
mas01cr@0 93
mas01cr@0 94 */
mas01cr@0 95
mas01cr@0 96 #include "audioDB.h"
mas01cr@0 97
mas01cr@0 98 #define O2_DEBUG
mas01cr@0 99
mas01cr@0 100 void audioDB::error(const char* a, const char* b){
mas01cr@0 101 cerr << a << ":" << b << endl;
mas01cr@0 102 exit(1);
mas01cr@0 103 }
mas01cr@0 104
mas01cr@0 105 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult):
mas01cr@0 106 dim(0),
mas01cr@0 107 dbName(0),
mas01cr@0 108 inFile(0),
mas01cr@0 109 key(0),
mas01cr@23 110 trackFile(0),
mas01cr@23 111 trackFileName(0),
mas01cr@0 112 timesFile(0),
mas01cr@0 113 timesFileName(0),
mas01cr@0 114 usingTimes(0),
mas01cr@0 115 command(0),
mas01cr@0 116 dbfid(0),
mas01cr@0 117 db(0),
mas01cr@0 118 dbH(0),
mas01cr@0 119 infid(0),
mas01cr@0 120 indata(0),
mas01cr@0 121 queryType(O2_FLAG_POINT_QUERY),
mas01cr@0 122 verbosity(1),
mas01cr@0 123 pointNN(O2_DEFAULT_POINTNN),
mas01cr@23 124 trackNN(O2_DEFAULT_TRACKNN),
mas01cr@23 125 trackTable(0),
mas01cr@0 126 fileTable(0),
mas01cr@0 127 dataBuf(0),
mas01cr@0 128 l2normTable(0),
mas01cr@0 129 timesTable(0),
mas01cr@0 130 qNorm(0),
mas01cr@0 131 sequenceLength(16),
mas01cr@0 132 sequenceHop(1),
mas01cr@0 133 queryPoint(0),
mas01cr@0 134 usingQueryPoint(0),
mas01cr@0 135 isClient(0),
mas01cr@0 136 isServer(0),
mas01cr@0 137 port(0),
mas01cr@23 138 timesTol(0.1),
mas01cr@23 139 radius(0){
mas01cr@0 140
mas01cr@0 141 if(processArgs(argc, argv)<0){
mas01cr@0 142 printf("No command found.\n");
mas01cr@0 143 cmdline_parser_print_version ();
mas01cr@0 144 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 145 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 146 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 147 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 148 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 149 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 150 exit(1);
mas01cr@0 151 }
mas01cr@0 152
mas01cr@0 153 if(O2_ACTION(COM_SERVER))
mas01cr@0 154 startServer();
mas01cr@0 155
mas01cr@0 156 else if(O2_ACTION(COM_CREATE))
mas01cr@0 157 create(dbName);
mas01cr@0 158
mas01cr@0 159 else if(O2_ACTION(COM_INSERT))
mas01cr@0 160 insert(dbName, inFile);
mas01cr@0 161
mas01cr@0 162 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 163 batchinsert(dbName, inFile);
mas01cr@0 164
mas01cr@0 165 else if(O2_ACTION(COM_QUERY))
mas01cr@0 166 if(isClient)
mas01cr@0 167 ws_query(dbName, inFile, (char*)hostport);
mas01cr@0 168 else
mas01cr@0 169 query(dbName, inFile, adbQueryResult);
mas01cr@0 170
mas01cr@0 171 else if(O2_ACTION(COM_STATUS))
mas01cr@0 172 if(isClient)
mas01cr@0 173 ws_status(dbName,(char*)hostport);
mas01cr@0 174 else
mas01cr@0 175 status(dbName);
mas01cr@0 176
mas01cr@0 177 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 178 l2norm(dbName);
mas01cr@0 179
mas01cr@0 180 else if(O2_ACTION(COM_DUMP))
mas01cr@0 181 dump(dbName);
mas01cr@0 182
mas01cr@0 183 else
mas01cr@0 184 error("Unrecognized command",command);
mas01cr@0 185 }
mas01cr@0 186
mas01cr@0 187 audioDB::~audioDB(){
mas01cr@0 188 // Clean up
mas01cr@0 189 if(indata)
mas01cr@0 190 munmap(indata,statbuf.st_size);
mas01cr@0 191 if(db)
mas01cr@0 192 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 193 if(dbfid>0)
mas01cr@0 194 close(dbfid);
mas01cr@0 195 if(infid>0)
mas01cr@0 196 close(infid);
mas01cr@0 197 if(dbH)
mas01cr@0 198 delete dbH;
mas01cr@0 199 }
mas01cr@0 200
mas01cr@0 201 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 202
mas01cr@0 203 if(argc<2){
mas01cr@0 204 cmdline_parser_print_version ();
mas01cr@0 205 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 206 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 207 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 208 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 209 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 210 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 211 exit(0);
mas01cr@0 212 }
mas01cr@0 213
mas01cr@0 214 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@0 215 exit(1) ;
mas01cr@0 216
mas01cr@0 217 if(args_info.help_given){
mas01cr@0 218 cmdline_parser_print_help();
mas01cr@0 219 exit(0);
mas01cr@0 220 }
mas01cr@0 221
mas01cr@0 222 if(args_info.verbosity_given){
mas01cr@0 223 verbosity=args_info.verbosity_arg;
mas01cr@0 224 if(verbosity<0 || verbosity>10){
mas01cr@0 225 cerr << "Warning: verbosity out of range, setting to 1" << endl;
mas01cr@0 226 verbosity=1;
mas01cr@0 227 }
mas01cr@0 228 }
mas01cr@0 229
mas01cr@23 230 if(args_info.radius_given){
mas01cr@23 231 radius=args_info.radius_arg;
mas01cr@23 232 if(radius<=0 || radius>1000000000){
mas01cr@23 233 cerr << "Warning: radius out of range" << endl;
mas01cr@23 234 exit(1);
mas01cr@23 235 }
mas01cr@23 236 else
mas01cr@23 237 if(verbosity>3)
mas01cr@23 238 cerr << "Setting radius to " << radius << endl;
mas01cr@23 239 }
mas01cr@23 240
mas01cr@0 241 if(args_info.SERVER_given){
mas01cr@0 242 command=COM_SERVER;
mas01cr@0 243 port=args_info.SERVER_arg;
mas01cr@0 244 if(port<100 || port > 100000)
mas01cr@0 245 error("port out of range");
mas01cr@0 246 isServer=1;
mas01cr@0 247 return 0;
mas01cr@0 248 }
mas01cr@0 249
mas01cr@0 250 // No return on client command, find database command
mas01cr@0 251 if(args_info.client_given){
mas01cr@0 252 command=COM_CLIENT;
mas01cr@0 253 hostport=args_info.client_arg;
mas01cr@0 254 isClient=1;
mas01cr@0 255 }
mas01cr@0 256
mas01cr@0 257 if(args_info.NEW_given){
mas01cr@0 258 command=COM_CREATE;
mas01cr@0 259 dbName=args_info.database_arg;
mas01cr@0 260 return 0;
mas01cr@0 261 }
mas01cr@0 262
mas01cr@0 263 if(args_info.STATUS_given){
mas01cr@0 264 command=COM_STATUS;
mas01cr@0 265 dbName=args_info.database_arg;
mas01cr@0 266 return 0;
mas01cr@0 267 }
mas01cr@0 268
mas01cr@0 269 if(args_info.DUMP_given){
mas01cr@0 270 command=COM_DUMP;
mas01cr@0 271 dbName=args_info.database_arg;
mas01cr@0 272 return 0;
mas01cr@0 273 }
mas01cr@0 274
mas01cr@0 275 if(args_info.L2NORM_given){
mas01cr@0 276 command=COM_L2NORM;
mas01cr@0 277 dbName=args_info.database_arg;
mas01cr@0 278 return 0;
mas01cr@0 279 }
mas01cr@0 280
mas01cr@0 281 if(args_info.INSERT_given){
mas01cr@0 282 command=COM_INSERT;
mas01cr@0 283 dbName=args_info.database_arg;
mas01cr@0 284 inFile=args_info.features_arg;
mas01cr@0 285 if(args_info.key_given)
mas01cr@0 286 key=args_info.key_arg;
mas01cr@0 287 if(args_info.times_given){
mas01cr@0 288 timesFileName=args_info.times_arg;
mas01cr@0 289 if(strlen(timesFileName)>0){
mas01cr@0 290 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@0 291 error("Could not open times file for reading", timesFileName);
mas01cr@0 292 usingTimes=1;
mas01cr@0 293 }
mas01cr@0 294 }
mas01cr@0 295 return 0;
mas01cr@0 296 }
mas01cr@15 297
mas01cr@0 298 if(args_info.BATCHINSERT_given){
mas01cr@0 299 command=COM_BATCHINSERT;
mas01cr@0 300 dbName=args_info.database_arg;
mas01cr@0 301 inFile=args_info.featureList_arg;
mas01cr@0 302 if(args_info.keyList_given)
mas01cr@0 303 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 304
mas01cr@0 305 /* TO DO: REPLACE WITH
mas01cr@0 306 if(args_info.keyList_given){
mas01cr@23 307 trackFileName=args_info.keyList_arg;
mas01cr@23 308 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
mas01cr@23 309 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 310 }
mas01cr@0 311 AND UPDATE BATCHINSERT()
mas01cr@0 312 */
mas01cr@0 313
mas01cr@0 314 if(args_info.timesList_given){
mas01cr@0 315 timesFileName=args_info.timesList_arg;
mas01cr@0 316 if(strlen(timesFileName)>0){
mas01cr@0 317 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@0 318 error("Could not open timesList file for reading", timesFileName);
mas01cr@0 319 usingTimes=1;
mas01cr@0 320 }
mas01cr@0 321 }
mas01cr@0 322 return 0;
mas01cr@0 323 }
mas01cr@0 324
mas01cr@0 325 // Query command and arguments
mas01cr@0 326 if(args_info.QUERY_given){
mas01cr@0 327 command=COM_QUERY;
mas01cr@0 328 dbName=args_info.database_arg;
mas01cr@0 329 inFile=args_info.features_arg;
mas01cr@0 330
mas01cr@0 331 if(args_info.keyList_given){
mas01cr@23 332 trackFileName=args_info.keyList_arg;
mas01cr@23 333 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
mas01cr@23 334 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 335 }
mas01cr@0 336
mas01cr@0 337 if(args_info.times_given){
mas01cr@0 338 timesFileName=args_info.times_arg;
mas01cr@0 339 if(strlen(timesFileName)>0){
mas01cr@0 340 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@0 341 error("Could not open times file for reading", timesFileName);
mas01cr@0 342 usingTimes=1;
mas01cr@0 343 }
mas01cr@0 344 }
mas01cr@0 345
mas01cr@0 346 // query type
mas01cr@23 347 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@23 348 queryType=O2_FLAG_TRACK_QUERY;
mas01cr@0 349 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@0 350 queryType=O2_FLAG_POINT_QUERY;
mas01cr@0 351 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@0 352 queryType=O2_FLAG_SEQUENCE_QUERY;
mas01cr@0 353 else
mas01cr@0 354 error("unsupported query type",args_info.QUERY_arg);
mas01cr@0 355
mas01cr@0 356 if(!args_info.exhaustive_flag){
mas01cr@0 357 queryPoint = args_info.qpoint_arg;
mas01cr@0 358 usingQueryPoint=1;
mas01cr@0 359 if(queryPoint<0 || queryPoint >10000)
mas01cr@0 360 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@0 361 }
mas01cr@0 362
mas01cr@0 363
mas01cr@0 364 pointNN=args_info.pointnn_arg;
mas01cr@0 365 if(pointNN<1 || pointNN >1000)
mas01cr@0 366 error("pointNN out of range: 1 <= pointNN <= 1000");
mas01cr@0 367
mas01cr@0 368
mas01cr@0 369
mas01cr@23 370 trackNN=args_info.resultlength_arg;
mas01cr@23 371 if(trackNN<1 || trackNN >10000)
mas01cr@0 372 error("resultlength out of range: 1 <= resultlength <= 1000");
mas01cr@0 373
mas01cr@0 374
mas01cr@0 375 sequenceLength=args_info.sequencelength_arg;
mas01cr@0 376 if(sequenceLength<1 || sequenceLength >1000)
mas01cr@0 377 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@0 378
mas01cr@0 379 sequenceHop=args_info.sequencehop_arg;
mas01cr@0 380 if(sequenceHop<1 || sequenceHop >1000)
mas01cr@0 381 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01cr@0 382
mas01cr@0 383 return 0;
mas01cr@0 384 }
mas01cr@0 385 return -1; // no command found
mas01cr@0 386 }
mas01cr@0 387
mas01cr@0 388 /* Make a new database
mas01cr@0 389
mas01cr@0 390 The database consists of:
mas01cr@0 391
mas01cr@0 392 header
mas01cr@0 393 ---------------------------------------------------------------------------------
mas01cr@0 394 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes |
mas01cr@0 395 ---------------------------------------------------------------------------------
mas01cr@0 396
mas01cr@0 397
mas01cr@23 398 keyTable : list of keys of tracks
mas01cr@0 399 --------------------------------------------------------------------------
mas01cr@0 400 | key 256 bytes |
mas01cr@0 401 --------------------------------------------------------------------------
mas01cr@0 402 O2_MAXFILES*02_FILENAMELENGTH
mas01cr@0 403
mas01cr@23 404 trackTable : Maps implicit feature index to a feature vector matrix
mas01cr@0 405 --------------------------------------------------------------------------
mas01cr@0 406 | numVectors (4 bytes) |
mas01cr@0 407 --------------------------------------------------------------------------
mas01cr@0 408 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT)
mas01cr@0 409
mas01cr@0 410 featureTable
mas01cr@0 411 --------------------------------------------------------------------------
mas01cr@0 412 | v1 v2 v3 ... vd (double) |
mas01cr@0 413 --------------------------------------------------------------------------
mas01cr@0 414 O2_MAXFILES * 02_MEANNUMFEATURES * DIM * sizeof(DOUBLE)
mas01cr@0 415
mas01cr@0 416 timesTable
mas01cr@0 417 --------------------------------------------------------------------------
mas01cr@0 418 | timestamp (double) |
mas01cr@0 419 --------------------------------------------------------------------------
mas01cr@0 420 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)
mas01cr@0 421
mas01cr@0 422 l2normTable
mas01cr@0 423 --------------------------------------------------------------------------
mas01cr@0 424 | nm (double) |
mas01cr@0 425 --------------------------------------------------------------------------
mas01cr@0 426 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)
mas01cr@0 427
mas01cr@0 428 */
mas01cr@0 429
mas01cr@0 430 void audioDB::create(const char* dbName){
mas01cr@15 431 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
mas01cr@15 432 error("Can't open database file", dbName);
mas01cr@0 433
mas01cr@0 434 // go to the location corresponding to the last byte
mas01cr@0 435 if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1)
mas01cr@0 436 error("lseek error in db file");
mas01cr@0 437
mas01cr@0 438 // write a dummy byte at the last location
mas01cr@0 439 if (write (dbfid, "", 1) != 1)
mas01cr@0 440 error("write error");
mas01cr@0 441
mas01cr@0 442 // mmap the output file
mas01cr@0 443 if(verbosity)
mas01cr@0 444 cerr << "header size:" << O2_HEADERSIZE << endl;
mas01cr@0 445 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01cr@0 446 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@0 447 error("mmap error for creating database");
mas01cr@0 448
mas01cr@0 449 dbH = new dbTableHeaderT();
mas01cr@0 450 assert(dbH);
mas01cr@0 451
mas01cr@0 452 // Initialize header
mas01cr@0 453 dbH->magic=O2_MAGIC;
mas01cr@0 454 dbH->numFiles=0;
mas01cr@0 455 dbH->length=0;
mas01cr@0 456 dbH->dim=0;
mas01cr@0 457 dbH->flags=0; //O2_FLAG_L2NORM;
mas01cr@0 458
mas01cr@0 459 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 460 if(verbosity)
mas01cr@0 461 cerr << COM_CREATE << " " << dbName << endl;
mas01cr@0 462
mas01cr@0 463 }
mas01cr@0 464
mas01cr@0 465
mas01cr@0 466 void audioDB::drop(){
mas01cr@0 467
mas01cr@0 468
mas01cr@0 469 }
mas01cr@0 470
mas01cr@0 471 // initTables - memory map files passed as arguments
mas01cr@0 472 // Precondition: database has already been created
mas01cr@0 473 void audioDB::initTables(const char* dbName, const char* inFile=0){
mas01cr@0 474 if ((dbfid = open (dbName, O_RDWR)) < 0)
mas01cr@0 475 error("Can't open database file:", dbName);
mas01cr@0 476
mas01cr@0 477 // open the input file
mas01cr@0 478 if (inFile && (infid = open (inFile, O_RDONLY)) < 0)
mas01cr@15 479 error("can't open input file for reading", inFile);
mas01cr@0 480
mas01cr@0 481 // find size of input file
mas01cr@0 482 if (inFile && fstat (infid,&statbuf) < 0)
mas01cr@0 483 error("fstat error finding size of input");
mas01cr@0 484
mas01cr@0 485 // Get the database header info
mas01cr@0 486 dbH = new dbTableHeaderT();
mas01cr@0 487 assert(dbH);
mas01cr@0 488
mas01cr@0 489 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
mas01cr@0 490 error("error reading db header");
mas01cr@0 491
mas01cr@0 492 fileTableOffset = O2_HEADERSIZE;
mas01cr@23 493 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
mas01cr@23 494 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES;
mas01cr@0 495 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 496 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 497
mas01cr@0 498 if(dbH->magic!=O2_MAGIC){
mas01cr@0 499 cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl;
mas01cr@0 500 error("database file has incorrect header",dbName);
mas01cr@0 501 }
mas01cr@0 502
mas01cr@0 503 if(inFile)
mas01cr@0 504 if(dbH->dim==0 && dbH->length==0) // empty database
mas01cr@0 505 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
mas01cr@0 506 else {
mas01cr@0 507 unsigned test;
mas01cr@0 508 read(infid,&test,sizeof(unsigned));
mas01cr@0 509 if(dbH->dim!=test){
mas01cr@0 510 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
mas01cr@0 511 error("feature dimensions do not match database table dimensions");
mas01cr@0 512 }
mas01cr@0 513 }
mas01cr@0 514
mas01cr@0 515 // mmap the input file
mas01cr@0 516 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
mas01cr@0 517 == (caddr_t) -1)
mas01cr@0 518 error("mmap error for input");
mas01cr@0 519
mas01cr@0 520 // mmap the database file
mas01cr@0 521 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01cr@0 522 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@0 523 error("mmap error for creating database");
mas01cr@0 524
mas01cr@0 525 // Make some handy tables with correct types
mas01cr@0 526 fileTable= (char*)(db+fileTableOffset);
mas01cr@23 527 trackTable = (unsigned*)(db+trackTableOffset);
mas01cr@0 528 dataBuf = (double*)(db+dataoffset);
mas01cr@0 529 l2normTable = (double*)(db+l2normTableOffset);
mas01cr@0 530 timesTable = (double*)(db+timesTableOffset);
mas01cr@0 531
mas01cr@0 532 }
mas01cr@0 533
mas01cr@0 534 void audioDB::insert(const char* dbName, const char* inFile){
mas01cr@0 535
mas01cr@0 536 initTables(dbName, inFile);
mas01cr@0 537
mas01cr@0 538 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 539 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 540
mas01cr@0 541 // Check that there is room for at least 1 more file
mas01cr@0 542 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int)))
mas01cr@0 543 error("No more room in database","insert failed: reason database is full.");
mas01cr@0 544
mas01cr@0 545 if(!key)
mas01cr@0 546 key=inFile;
mas01cr@0 547 // Linear scan of filenames check for pre-existing feature
mas01cr@0 548 unsigned alreadyInserted=0;
mas01cr@0 549 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 550 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){
mas01cr@0 551 alreadyInserted=1;
mas01cr@0 552 break;
mas01cr@0 553 }
mas01cr@0 554
mas01cr@0 555 if(alreadyInserted){
mas01cr@0 556 if(verbosity)
mas01cr@0 557 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
mas01cr@0 558 return;
mas01cr@0 559 }
mas01cr@0 560
mas01cr@23 561 // Make a track index table of features to file indexes
mas01cr@0 562 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 563 if(!numVectors){
mas01cr@0 564 if(verbosity)
mas01cr@0 565 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
mas01cr@0 566 // CLEAN UP
mas01cr@0 567 munmap(indata,statbuf.st_size);
mas01cr@0 568 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 569 close(infid);
mas01cr@0 570 return;
mas01cr@0 571 }
mas01cr@0 572
mas01cr@0 573 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
mas01cr@0 574
mas01cr@0 575 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 576
mas01cr@0 577 // Check times status and insert times from file
mas01cr@0 578 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 579 double* timesdata=timesTable+timesoffset;
mas01cr@0 580 assert(timesdata+numVectors<l2normTable);
mas01cr@0 581 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 582
mas01cr@0 583 // Increment file count
mas01cr@0 584 dbH->numFiles++;
mas01cr@0 585
mas01cr@0 586 // Update Header information
mas01cr@0 587 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 588
mas01cr@0 589 // Copy the header back to the database
mas01cr@0 590 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@0 591
mas01cr@23 592 // Update track to file index map
mas01cr@23 593 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
mas01cr@23 594 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 595
mas01cr@0 596 // Update the feature database
mas01cr@0 597 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
mas01cr@0 598
mas01cr@0 599 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 600 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 601 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 602
mas01cr@0 603 // Report status
mas01cr@0 604 status(dbName);
mas01cr@0 605 if(verbosity)
mas01cr@0 606 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
mas01cr@0 607 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;
mas01cr@0 608
mas01cr@0 609 // CLEAN UP
mas01cr@0 610 munmap(indata,statbuf.st_size);
mas01cr@0 611 close(infid);
mas01cr@0 612 }
mas01cr@0 613
mas01cr@0 614 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){
mas01cr@0 615 unsigned numtimes=0;
mas01cr@0 616 if(usingTimes){
mas01cr@0 617 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles)
mas01cr@0 618 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@0 619 else if(!(dbH->flags&O2_FLAG_TIMES)){
mas01cr@0 620 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl;
mas01cr@0 621 usingTimes=0;
mas01cr@0 622 }
mas01cr@0 623
mas01cr@0 624 if(!timesFile->is_open()){
mas01cr@0 625 if(dbH->flags & O2_FLAG_TIMES){
mas01cr@0 626 munmap(indata,statbuf.st_size);
mas01cr@0 627 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 628 error("problem opening times file on timestamped database",timesFileName);
mas01cr@0 629 }
mas01cr@0 630 else{
mas01cr@0 631 cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl;
mas01cr@0 632 usingTimes=0;
mas01cr@0 633 }
mas01cr@0 634 }
mas01cr@0 635
mas01cr@0 636 // Process time file
mas01cr@0 637 if(usingTimes){
mas01cr@0 638 do{
mas01cr@0 639 *timesFile>>*timesdata++;
mas01cr@0 640 if(timesFile->eof())
mas01cr@0 641 break;
mas01cr@0 642 numtimes++;
mas01cr@0 643 }while(!timesFile->eof() && numtimes<numVectors);
mas01cr@0 644 if(!timesFile->eof()){
mas01cr@0 645 double dummy;
mas01cr@0 646 do{
mas01cr@0 647 *timesFile>>dummy;
mas01cr@0 648 if(timesFile->eof())
mas01cr@0 649 break;
mas01cr@0 650 numtimes++;
mas01cr@0 651 }while(!timesFile->eof());
mas01cr@0 652 }
mas01cr@0 653 if(numtimes<numVectors || numtimes>numVectors+2){
mas01cr@0 654 munmap(indata,statbuf.st_size);
mas01cr@0 655 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 656 close(infid);
mas01cr@0 657 cerr << "expected " << numVectors << " found " << numtimes << endl;
mas01cr@0 658 error("Times file is incorrect length for features file",inFile);
mas01cr@0 659 }
mas01cr@0 660 if(verbosity>2)
mas01cr@0 661 cerr << "numtimes: " << numtimes << endl;
mas01cr@0 662 }
mas01cr@0 663 }
mas01cr@0 664 }
mas01cr@0 665
mas01cr@0 666 void audioDB::batchinsert(const char* dbName, const char* inFile){
mas01cr@0 667
mas01cr@0 668 if ((dbfid = open (dbName, O_RDWR)) < 0)
mas01cr@0 669 error("Can't open database file:", dbName);
mas01cr@0 670
mas01cr@0 671 if(!key)
mas01cr@0 672 key=inFile;
mas01cr@0 673 ifstream *filesIn = 0;
mas01cr@0 674 ifstream *keysIn = 0;
mas01cr@0 675 ifstream* thisTimesFile = 0;
mas01cr@0 676
mas01cr@0 677 if(!(filesIn = new ifstream(inFile)))
mas01cr@0 678 error("Could not open batch in file", inFile);
mas01cr@0 679 if(key && key!=inFile)
mas01cr@0 680 if(!(keysIn = new ifstream(key)))
mas01cr@0 681 error("Could not open batch key file",key);
mas01cr@0 682
mas01cr@0 683 // Get the database header info
mas01cr@0 684 dbH = new dbTableHeaderT();
mas01cr@0 685 assert(dbH);
mas01cr@0 686
mas01cr@0 687 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
mas01cr@0 688 error("error reading db header");
mas01cr@0 689
mas01cr@0 690 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 691 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 692
mas01cr@0 693 fileTableOffset = O2_HEADERSIZE;
mas01cr@23 694 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
mas01cr@23 695 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES;
mas01cr@0 696 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 697 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
mas01cr@0 698
mas01cr@0 699 if(dbH->magic!=O2_MAGIC){
mas01cr@0 700 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl;
mas01cr@0 701 error("database file has incorrect header",dbName);
mas01cr@0 702 }
mas01cr@0 703
mas01cr@0 704
mas01cr@0 705 unsigned totalVectors=0;
mas01cr@0 706 char *thisKey = new char[MAXSTR];
mas01cr@0 707 char *thisFile = new char[MAXSTR];
mas01cr@0 708 char *thisTimesFileName = new char[MAXSTR];
mas01cr@0 709
mas01cr@0 710 do{
mas01cr@0 711 filesIn->getline(thisFile,MAXSTR);
mas01cr@0 712 if(key && key!=inFile)
mas01cr@0 713 keysIn->getline(thisKey,MAXSTR);
mas01cr@0 714 else
mas01cr@0 715 thisKey = thisFile;
mas01cr@0 716 if(usingTimes)
mas01cr@0 717 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@0 718
mas01cr@0 719 if(filesIn->eof())
mas01cr@0 720 break;
mas01cr@0 721
mas01cr@0 722 // open the input file
mas01cr@0 723 if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0)
mas01cr@0 724 error("can't open feature file for reading", thisFile);
mas01cr@0 725
mas01cr@0 726 // find size of input file
mas01cr@0 727 if (thisFile && fstat (infid,&statbuf) < 0)
mas01cr@0 728 error("fstat error finding size of input");
mas01cr@0 729
mas01cr@15 730 // mmap the database file
mas01cr@15 731 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01cr@15 732 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@15 733 error("mmap error for creating database");
mas01cr@15 734
mas01cr@15 735 // Make some handy tables with correct types
mas01cr@15 736 fileTable= (char*)(db+fileTableOffset);
mas01cr@23 737 trackTable = (unsigned*)(db+trackTableOffset);
mas01cr@15 738 dataBuf = (double*)(db+dataoffset);
mas01cr@15 739 l2normTable = (double*)(db+l2normTableOffset);
mas01cr@15 740 timesTable = (double*)(db+timesTableOffset);
mas01cr@15 741
mas01cr@0 742 // Check that there is room for at least 1 more file
mas01cr@0 743 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
mas01cr@0 744 error("No more room in database","insert failed: reason database is full.");
mas01cr@0 745
mas01cr@0 746 if(thisFile)
mas01cr@0 747 if(dbH->dim==0 && dbH->length==0) // empty database
mas01cr@0 748 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
mas01cr@0 749 else {
mas01cr@0 750 unsigned test;
mas01cr@0 751 read(infid,&test,sizeof(unsigned));
mas01cr@0 752 if(dbH->dim!=test){
mas01cr@0 753 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
mas01cr@0 754 error("feature dimensions do not match database table dimensions");
mas01cr@0 755 }
mas01cr@0 756 }
mas01cr@0 757
mas01cr@0 758 // mmap the input file
mas01cr@0 759 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
mas01cr@0 760 == (caddr_t) -1)
mas01cr@0 761 error("mmap error for input");
mas01cr@0 762
mas01cr@0 763
mas01cr@0 764 // Linear scan of filenames check for pre-existing feature
mas01cr@0 765 unsigned alreadyInserted=0;
mas01cr@0 766
mas01cr@0 767 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 768 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){
mas01cr@0 769 alreadyInserted=1;
mas01cr@0 770 break;
mas01cr@0 771 }
mas01cr@0 772
mas01cr@0 773 if(alreadyInserted){
mas01cr@0 774 if(verbosity)
mas01cr@0 775 cerr << "Warning: key already exists in database:" << thisKey << endl;
mas01cr@0 776 }
mas01cr@0 777 else{
mas01cr@0 778
mas01cr@23 779 // Make a track index table of features to file indexes
mas01cr@0 780 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 781 if(!numVectors){
mas01cr@0 782 if(verbosity)
mas01cr@0 783 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
mas01cr@0 784 }
mas01cr@0 785 else{
mas01cr@0 786 if(usingTimes){
mas01cr@0 787 if(timesFile->eof())
mas01cr@0 788 error("not enough timestamp files in timesList");
mas01cr@0 789 thisTimesFile=new ifstream(thisTimesFileName,ios::in);
mas01cr@0 790 if(!thisTimesFile->is_open())
mas01cr@0 791 error("Cannot open timestamp file",thisTimesFileName);
mas01cr@0 792 unsigned insertoffset=dbH->length;
mas01cr@0 793 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 794 double* timesdata=timesTable+timesoffset;
mas01cr@0 795 assert(timesdata+numVectors<l2normTable);
mas01cr@0 796 insertTimeStamps(numVectors,thisTimesFile,timesdata);
mas01cr@0 797 if(thisTimesFile)
mas01cr@0 798 delete thisTimesFile;
mas01cr@0 799 }
mas01cr@0 800
mas01cr@0 801 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
mas01cr@0 802
mas01cr@0 803 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 804
mas01cr@0 805 // Increment file count
mas01cr@0 806 dbH->numFiles++;
mas01cr@0 807
mas01cr@0 808 // Update Header information
mas01cr@0 809 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 810 // Copy the header back to the database
mas01cr@0 811 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@0 812
mas01cr@23 813 // Update track to file index map
mas01cr@23 814 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
mas01cr@23 815 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 816
mas01cr@0 817 // Update the feature database
mas01cr@0 818 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
mas01cr@0 819
mas01cr@0 820 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 821 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 822 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 823
mas01cr@0 824 totalVectors+=numVectors;
mas01cr@0 825 }
mas01cr@0 826 }
mas01cr@0 827 // CLEAN UP
mas01cr@0 828 munmap(indata,statbuf.st_size);
mas01cr@0 829 close(infid);
mas01cr@15 830 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 831 }while(!filesIn->eof());
mas01cr@15 832
mas01cr@15 833 // mmap the database file
mas01cr@15 834 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
mas01cr@15 835 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@15 836 error("mmap error for creating database");
mas01cr@0 837
mas01cr@0 838 if(verbosity)
mas01cr@0 839 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
mas01cr@0 840 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
mas01cr@0 841
mas01cr@0 842 // Report status
mas01cr@0 843 status(dbName);
mas01cr@15 844
mas01cr@15 845 munmap(db,O2_DEFAULTDBSIZE);
mas01cr@0 846 }
mas01cr@0 847
mas01cr@0 848 void audioDB::ws_status(const char*dbName, char* hostport){
mas01cr@0 849 struct soap soap;
mas01cr@0 850 int adbStatusResult;
mas01cr@0 851
mas01cr@0 852 // Query an existing adb database
mas01cr@0 853 soap_init(&soap);
mas01cr@0 854 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK)
mas01cr@0 855 std::cout << "result = " << adbStatusResult << std::endl;
mas01cr@0 856 else
mas01cr@0 857 soap_print_fault(&soap,stderr);
mas01cr@0 858
mas01cr@0 859 soap_destroy(&soap);
mas01cr@0 860 soap_end(&soap);
mas01cr@0 861 soap_done(&soap);
mas01cr@0 862 }
mas01cr@0 863
mas01cr@23 864 void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){
mas01cr@0 865 struct soap soap;
mas01cr@0 866 adb__queryResult adbQueryResult;
mas01cr@0 867
mas01cr@0 868 soap_init(&soap);
mas01cr@0 869 if(soap_call_adb__query(&soap,hostport,NULL,
mas01cr@23 870 (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName,
mas01cr@23 871 queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResult)==SOAP_OK){
mas01cr@0 872 //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl;
mas01cr@0 873 for(int i=0; i<adbQueryResult.__sizeRlist; i++)
mas01cr@0 874 std::cout << adbQueryResult.Rlist[i] << " " << adbQueryResult.Dist[i]
mas01cr@0 875 << " " << adbQueryResult.Qpos[i] << " " << adbQueryResult.Spos[i] << std::endl;
mas01cr@0 876 }
mas01cr@0 877 else
mas01cr@0 878 soap_print_fault(&soap,stderr);
mas01cr@0 879
mas01cr@0 880 soap_destroy(&soap);
mas01cr@0 881 soap_end(&soap);
mas01cr@0 882 soap_done(&soap);
mas01cr@0 883
mas01cr@0 884 }
mas01cr@0 885
mas01cr@0 886
mas01cr@0 887 void audioDB::status(const char* dbName){
mas01cr@0 888 if(!dbH)
mas01cr@0 889 initTables(dbName, 0);
mas01cr@0 890
mas01cr@0 891 // Update Header information
mas01cr@0 892 cout << "num files:" << dbH->numFiles << endl;
mas01cr@0 893 cout << "data dim:" << dbH->dim <<endl;
mas01cr@0 894 if(dbH->dim>0){
mas01cr@0 895 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
mas01cr@0 896 cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
mas01cr@0 897 }
mas01cr@0 898 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl;
mas01cr@0 899 cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" <<
mas01cr@0 900 (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl;
mas01cr@0 901 cout << "flags:" << dbH->flags << endl;
mas01cr@0 902
mas01cr@0 903 unsigned dudCount=0;
mas01cr@0 904 unsigned nullCount=0;
mas01cr@0 905 for(unsigned k=0; k<dbH->numFiles; k++){
mas01cr@23 906 if(trackTable[k]<sequenceLength){
mas01cr@0 907 dudCount++;
mas01cr@23 908 if(!trackTable[k])
mas01cr@0 909 nullCount++;
mas01cr@0 910 }
mas01cr@0 911 }
mas01cr@0 912 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
mas01cr@0 913 }
mas01cr@0 914
mas01cr@0 915
mas01cr@0 916 void audioDB::dump(const char* dbName){
mas01cr@0 917 if(!dbH)
mas01cr@0 918 initTables(dbName,0);
mas01cr@0 919
mas01cr@23 920 for(unsigned k=0, j=0; k<dbH->numFiles; k++){
mas01cr@23 921 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
mas01cr@23 922 j+=trackTable[k];
mas01cr@23 923 }
mas01cr@0 924
mas01cr@0 925 status(dbName);
mas01cr@0 926 }
mas01cr@0 927
mas01cr@0 928 void audioDB::l2norm(const char* dbName){
mas01cr@0 929 initTables(dbName,0);
mas01cr@0 930 if(dbH->length>0){
mas01cr@0 931 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@0 932 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 933 }
mas01cr@0 934 // Update database flags
mas01cr@0 935 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 936 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 937 }
mas01cr@0 938
mas01cr@0 939
mas01cr@0 940
mas01cr@0 941 void audioDB::query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 942 switch(queryType){
mas01cr@0 943 case O2_FLAG_POINT_QUERY:
mas01cr@0 944 pointQuery(dbName, inFile, adbQueryResult);
mas01cr@0 945 break;
mas01cr@0 946 case O2_FLAG_SEQUENCE_QUERY:
mas01cr@23 947 if(radius==0)
mas01cr@23 948 trackSequenceQueryNN(dbName, inFile, adbQueryResult);
mas01cr@23 949 else
mas01cr@23 950 trackSequenceQueryRad(dbName, inFile, adbQueryResult);
mas01cr@0 951 break;
mas01cr@23 952 case O2_FLAG_TRACK_QUERY:
mas01cr@23 953 trackPointQuery(dbName, inFile, adbQueryResult);
mas01cr@0 954 break;
mas01cr@0 955 default:
mas01cr@0 956 error("unrecognized queryType in query()");
mas01cr@0 957
mas01cr@0 958 }
mas01cr@0 959 }
mas01cr@0 960
mas01cr@0 961 //return ordinal position of key in keyTable
mas01cr@0 962 unsigned audioDB::getKeyPos(char* key){
mas01cr@0 963 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 964 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
mas01cr@0 965 return k;
mas01cr@0 966 error("Key not found",key);
mas01cr@0 967 return O2_ERR_KEYNOTFOUND;
mas01cr@0 968 }
mas01cr@0 969
mas01cr@0 970 // Basic point query engine
mas01cr@0 971 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 972
mas01cr@0 973 initTables(dbName, inFile);
mas01cr@0 974
mas01cr@0 975 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 976 // we use stdout in this stub version
mas01cr@0 977 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 978
mas01cr@0 979 double* query = (double*)(indata+sizeof(int));
mas01cr@0 980 double* data = dataBuf;
mas01cr@0 981 double* queryCopy = 0;
mas01cr@0 982
mas01cr@0 983 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 984 // Make a copy of the query
mas01cr@0 985 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 986 qNorm = new double[numVectors];
mas01cr@0 987 assert(queryCopy&&qNorm);
mas01cr@0 988 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 989 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 990 query = queryCopy;
mas01cr@0 991 }
mas01cr@0 992
mas01cr@0 993 // Make temporary dynamic memory for results
mas01cr@0 994 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@0 995 double distances[pointNN];
mas01cr@0 996 unsigned qIndexes[pointNN];
mas01cr@0 997 unsigned sIndexes[pointNN];
mas01cr@0 998 for(unsigned k=0; k<pointNN; k++){
mas01cr@0 999 distances[k]=0.0;
mas01cr@0 1000 qIndexes[k]=~0;
mas01cr@0 1001 sIndexes[k]=~0;
mas01cr@0 1002 }
mas01cr@0 1003
mas01cr@0 1004 unsigned j=numVectors;
mas01cr@0 1005 unsigned k,l,n;
mas01cr@0 1006 double thisDist;
mas01cr@0 1007
mas01cr@0 1008 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
mas01cr@0 1009 double meanQdur = 0;
mas01cr@0 1010 double* timesdata = 0;
mas01cr@0 1011 double* dbdurs = 0;
mas01cr@0 1012
mas01cr@0 1013 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1014 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1015 usingTimes=0;
mas01cr@0 1016 }
mas01cr@0 1017
mas01cr@0 1018 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1019 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1020
mas01cr@0 1021 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1022 timesdata = new double[numVectors];
mas01cr@0 1023 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1024 // Calculate durations of points
mas01cr@0 1025 for(k=0; k<numVectors-1; k++){
mas01cr@0 1026 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1027 meanQdur+=timesdata[k];
mas01cr@0 1028 }
mas01cr@0 1029 meanQdur/=k;
mas01cr@0 1030 // Individual exhaustive timepoint durations
mas01cr@0 1031 dbdurs = new double[totalVecs];
mas01cr@0 1032 for(k=0; k<totalVecs-1; k++)
mas01cr@0 1033 dbdurs[k]=timesTable[k+1]-timesTable[k];
mas01cr@0 1034 j--; // decrement vector counter by one
mas01cr@0 1035 }
mas01cr@0 1036
mas01cr@0 1037 if(usingQueryPoint)
mas01cr@0 1038 if(queryPoint>numVectors-1)
mas01cr@0 1039 error("queryPoint > numVectors in query");
mas01cr@0 1040 else{
mas01cr@0 1041 if(verbosity>1)
mas01cr@0 1042 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@0 1043 query=query+queryPoint*dbH->dim;
mas01cr@0 1044 numVectors=queryPoint+1;
mas01cr@0 1045 j=1;
mas01cr@0 1046 }
mas01cr@0 1047
mas01cr@0 1048 gettimeofday(&tv1, NULL);
mas01cr@0 1049 while(j--){ // query
mas01cr@0 1050 data=dataBuf;
mas01cr@0 1051 k=totalVecs; // number of database vectors
mas01cr@0 1052 while(k--){ // database
mas01cr@0 1053 thisDist=0;
mas01cr@0 1054 l=dbH->dim;
mas01cr@0 1055 double* q=query;
mas01cr@0 1056 while(l--)
mas01cr@0 1057 thisDist+=*q++**data++;
mas01cr@0 1058 if(!usingTimes ||
mas01cr@0 1059 (usingTimes
mas01cr@0 1060 && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){
mas01cr@0 1061 n=pointNN;
mas01cr@0 1062 while(n--){
mas01cr@0 1063 if(thisDist>=distances[n]){
mas01cr@0 1064 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1065 // Copy all values above up the queue
mas01cr@0 1066 for( l=pointNN-1 ; l >= n+1 ; l--){
mas01cr@0 1067 distances[l]=distances[l-1];
mas01cr@0 1068 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1069 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1070 }
mas01cr@0 1071 distances[n]=thisDist;
mas01cr@0 1072 qIndexes[n]=numVectors-j-1;
mas01cr@0 1073 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
mas01cr@0 1074 break;
mas01cr@0 1075 }
mas01cr@0 1076 }
mas01cr@0 1077 else
mas01cr@0 1078 break;
mas01cr@0 1079 }
mas01cr@0 1080 }
mas01cr@0 1081 }
mas01cr@0 1082 // Move query pointer to next query point
mas01cr@0 1083 query+=dbH->dim;
mas01cr@0 1084 }
mas01cr@0 1085
mas01cr@0 1086 gettimeofday(&tv2, NULL);
mas01cr@0 1087 if(verbosity>1)
mas01cr@0 1088 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@0 1089
mas01cr@0 1090 if(adbQueryResult==0){
mas01cr@0 1091 // Output answer
mas01cr@0 1092 // Loop over nearest neighbours
mas01cr@0 1093 for(k=0; k < pointNN; k++){
mas01cr@0 1094 // Scan for key
mas01cr@23 1095 unsigned cumTrack=0;
mas01cr@0 1096 for(l=0 ; l<dbH->numFiles; l++){
mas01cr@23 1097 cumTrack+=trackTable[l];
mas01cr@23 1098 if(sIndexes[k]<cumTrack){
mas01cr@0 1099 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
mas01cr@23 1100 << sIndexes[k]+trackTable[l]-cumTrack << endl;
mas01cr@0 1101 break;
mas01cr@0 1102 }
mas01cr@0 1103 }
mas01cr@0 1104 }
mas01cr@0 1105 }
mas01cr@0 1106 else{ // Process Web Services Query
mas01cr@0 1107 int listLen = pointNN;
mas01cr@0 1108 adbQueryResult->__sizeRlist=listLen;
mas01cr@0 1109 adbQueryResult->__sizeDist=listLen;
mas01cr@0 1110 adbQueryResult->__sizeQpos=listLen;
mas01cr@0 1111 adbQueryResult->__sizeSpos=listLen;
mas01cr@0 1112 adbQueryResult->Rlist= new char*[listLen];
mas01cr@0 1113 adbQueryResult->Dist = new double[listLen];
mas01cr@0 1114 adbQueryResult->Qpos = new int[listLen];
mas01cr@0 1115 adbQueryResult->Spos = new int[listLen];
mas01cr@0 1116 for(k=0; k<adbQueryResult->__sizeRlist; k++){
mas01cr@0 1117 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@0 1118 adbQueryResult->Dist[k]=distances[k];
mas01cr@0 1119 adbQueryResult->Qpos[k]=qIndexes[k];
mas01cr@23 1120 unsigned cumTrack=0;
mas01cr@0 1121 for(l=0 ; l<dbH->numFiles; l++){
mas01cr@23 1122 cumTrack+=trackTable[l];
mas01cr@23 1123 if(sIndexes[k]<cumTrack){
mas01cr@0 1124 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
mas01cr@0 1125 break;
mas01cr@0 1126 }
mas01cr@0 1127 }
mas01cr@23 1128 adbQueryResult->Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
mas01cr@0 1129 }
mas01cr@0 1130 }
mas01cr@0 1131
mas01cr@0 1132 // Clean up
mas01cr@0 1133 if(queryCopy)
mas01cr@0 1134 delete queryCopy;
mas01cr@0 1135 if(qNorm)
mas01cr@0 1136 delete qNorm;
mas01cr@0 1137 if(timesdata)
mas01cr@0 1138 delete timesdata;
mas01cr@0 1139 if(dbdurs)
mas01cr@0 1140 delete dbdurs;
mas01cr@0 1141 }
mas01cr@0 1142
mas01cr@23 1143 // trackPointQuery
mas01cr@23 1144 // return the trackNN closest tracks to the query track
mas01cr@23 1145 // uses average of pointNN points per track
mas01cr@23 1146 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 1147 initTables(dbName, inFile);
mas01cr@0 1148
mas01cr@0 1149 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1150 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@23 1151 unsigned numTracks = dbH->numFiles;
mas01cr@0 1152
mas01cr@0 1153 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1154 double* data = dataBuf;
mas01cr@0 1155 double* queryCopy = 0;
mas01cr@0 1156
mas01cr@0 1157 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 1158 // Make a copy of the query
mas01cr@0 1159 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1160 qNorm = new double[numVectors];
mas01cr@0 1161 assert(queryCopy&&qNorm);
mas01cr@0 1162 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1163 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1164 query = queryCopy;
mas01cr@0 1165 }
mas01cr@0 1166
mas01cr@0 1167 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@23 1168 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@0 1169
mas01cr@0 1170 // Make temporary dynamic memory for results
mas01cr@23 1171 double trackDistances[trackNN];
mas01cr@23 1172 unsigned trackIDs[trackNN];
mas01cr@23 1173 unsigned trackQIndexes[trackNN];
mas01cr@23 1174 unsigned trackSIndexes[trackNN];
mas01cr@0 1175
mas01cr@0 1176 double distances[pointNN];
mas01cr@0 1177 unsigned qIndexes[pointNN];
mas01cr@0 1178 unsigned sIndexes[pointNN];
mas01cr@0 1179
mas01cr@0 1180 unsigned j=numVectors; // number of query points
mas01cr@23 1181 unsigned k,l,n, track, trackOffset=0, processedTracks=0;
mas01cr@0 1182 double thisDist;
mas01cr@0 1183
mas01cr@0 1184 for(k=0; k<pointNN; k++){
mas01cr@0 1185 distances[k]=0.0;
mas01cr@0 1186 qIndexes[k]=~0;
mas01cr@0 1187 sIndexes[k]=~0;
mas01cr@0 1188 }
mas01cr@0 1189
mas01cr@23 1190 for(k=0; k<trackNN; k++){
mas01cr@23 1191 trackDistances[k]=0.0;
mas01cr@23 1192 trackQIndexes[k]=~0;
mas01cr@23 1193 trackSIndexes[k]=~0;
mas01cr@23 1194 trackIDs[k]=~0;
mas01cr@0 1195 }
mas01cr@0 1196
mas01cr@0 1197 double meanQdur = 0;
mas01cr@0 1198 double* timesdata = 0;
mas01cr@0 1199 double* meanDBdur = 0;
mas01cr@0 1200
mas01cr@0 1201 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1202 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1203 usingTimes=0;
mas01cr@0 1204 }
mas01cr@0 1205
mas01cr@0 1206 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1207 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1208
mas01cr@0 1209 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1210 timesdata = new double[numVectors];
mas01cr@0 1211 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1212 // Calculate durations of points
mas01cr@0 1213 for(k=0; k<numVectors-1; k++){
mas01cr@0 1214 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1215 meanQdur+=timesdata[k];
mas01cr@0 1216 }
mas01cr@0 1217 meanQdur/=k;
mas01cr@0 1218 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1219 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1220 meanDBdur[k]=0.0;
mas01cr@23 1221 for(j=0; j<trackTable[k]-1 ; j++)
mas01cr@0 1222 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1223 meanDBdur[k]/=j;
mas01cr@0 1224 }
mas01cr@0 1225 }
mas01cr@0 1226
mas01cr@0 1227 if(usingQueryPoint)
mas01cr@0 1228 if(queryPoint>numVectors-1)
mas01cr@0 1229 error("queryPoint > numVectors in query");
mas01cr@0 1230 else{
mas01cr@0 1231 if(verbosity>1)
mas01cr@0 1232 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@0 1233 query=query+queryPoint*dbH->dim;
mas01cr@0 1234 numVectors=queryPoint+1;
mas01cr@0 1235 }
mas01cr@0 1236
mas01cr@23 1237 // build track offset table
mas01cr@23 1238 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01cr@23 1239 unsigned cumTrack=0;
mas01cr@23 1240 unsigned trackIndexOffset;
mas01cr@0 1241 for(k=0; k<dbH->numFiles;k++){
mas01cr@23 1242 trackOffsetTable[k]=cumTrack;
mas01cr@23 1243 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@0 1244 }
mas01cr@0 1245
mas01cr@0 1246 char nextKey[MAXSTR];
mas01cr@0 1247
mas01cr@0 1248 gettimeofday(&tv1, NULL);
mas01cr@0 1249
mas01cr@23 1250 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01cr@23 1251 if(trackFile){
mas01cr@23 1252 if(!trackFile->eof()){
mas01cr@23 1253 trackFile->getline(nextKey,MAXSTR);
mas01cr@23 1254 track=getKeyPos(nextKey);
mas01cr@0 1255 }
mas01cr@0 1256 else
mas01cr@0 1257 break;
mas01cr@0 1258 }
mas01cr@23 1259 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01cr@23 1260 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@0 1261 if(verbosity>7)
mas01cr@23 1262 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@0 1263
mas01cr@0 1264 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 1265 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
mas01cr@0 1266 else
mas01cr@0 1267 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
mas01cr@0 1268 if(usingQueryPoint)
mas01cr@0 1269 j=1;
mas01cr@0 1270 else
mas01cr@0 1271 j=numVectors;
mas01cr@0 1272 while(j--){
mas01cr@23 1273 k=trackTable[track]; // number of vectors in track
mas01cr@23 1274 data=dataBuf+trackOffset; // data for track
mas01cr@0 1275 while(k--){
mas01cr@0 1276 thisDist=0;
mas01cr@0 1277 l=dbH->dim;
mas01cr@0 1278 double* q=query;
mas01cr@0 1279 while(l--)
mas01cr@0 1280 thisDist+=*q++**data++;
mas01cr@0 1281 if(!usingTimes ||
mas01cr@0 1282 (usingTimes
mas01cr@23 1283 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1284 n=pointNN;
mas01cr@0 1285 while(n--){
mas01cr@0 1286 if(thisDist>=distances[n]){
mas01cr@0 1287 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1288 // Copy all values above up the queue
mas01cr@0 1289 for( l=pointNN-1 ; l > n ; l--){
mas01cr@0 1290 distances[l]=distances[l-1];
mas01cr@0 1291 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1292 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1293 }
mas01cr@0 1294 distances[n]=thisDist;
mas01cr@0 1295 qIndexes[n]=numVectors-j-1;
mas01cr@23 1296 sIndexes[n]=trackTable[track]-k-1;
mas01cr@0 1297 break;
mas01cr@0 1298 }
mas01cr@0 1299 }
mas01cr@0 1300 else
mas01cr@0 1301 break;
mas01cr@0 1302 }
mas01cr@0 1303 }
mas01cr@23 1304 } // track
mas01cr@0 1305 // Move query pointer to next query point
mas01cr@0 1306 query+=dbH->dim;
mas01cr@0 1307 } // query
mas01cr@23 1308 // Take the average of this track's distance
mas01cr@23 1309 // Test the track distances
mas01cr@0 1310 thisDist=0;
mas01cr@0 1311 n=pointNN;
mas01cr@0 1312 while(n--)
mas01cr@0 1313 thisDist+=distances[pointNN-n-1];
mas01cr@0 1314 thisDist/=pointNN;
mas01cr@23 1315 n=trackNN;
mas01cr@0 1316 while(n--){
mas01cr@23 1317 if(thisDist>=trackDistances[n]){
mas01cr@23 1318 if((n==0 || thisDist<=trackDistances[n-1])){
mas01cr@0 1319 // Copy all values above up the queue
mas01cr@0 1320 for( l=pointNN-1 ; l > n ; l--){
mas01cr@23 1321 trackDistances[l]=trackDistances[l-1];
mas01cr@23 1322 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@23 1323 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@23 1324 trackIDs[l]=trackIDs[l-1];
mas01cr@0 1325 }
mas01cr@23 1326 trackDistances[n]=thisDist;
mas01cr@23 1327 trackQIndexes[n]=qIndexes[0];
mas01cr@23 1328 trackSIndexes[n]=sIndexes[0];
mas01cr@23 1329 trackIDs[n]=track;
mas01cr@0 1330 break;
mas01cr@0 1331 }
mas01cr@0 1332 }
mas01cr@0 1333 else
mas01cr@0 1334 break;
mas01cr@0 1335 }
mas01cr@0 1336 for(unsigned k=0; k<pointNN; k++){
mas01cr@0 1337 distances[k]=0.0;
mas01cr@0 1338 qIndexes[k]=~0;
mas01cr@0 1339 sIndexes[k]=~0;
mas01cr@0 1340 }
mas01cr@23 1341 } // tracks
mas01cr@0 1342 gettimeofday(&tv2, NULL);
mas01cr@0 1343
mas01cr@0 1344 if(verbosity>1)
mas01cr@23 1345 cerr << endl << "processed tracks :" << processedTracks
mas01cr@0 1346 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@0 1347
mas01cr@0 1348 if(adbQueryResult==0){
mas01cr@0 1349 if(verbosity>1)
mas01cr@0 1350 cerr<<endl;
mas01cr@0 1351 // Output answer
mas01cr@0 1352 // Loop over nearest neighbours
mas01cr@23 1353 for(k=0; k < min(trackNN,processedTracks); k++)
mas01cr@23 1354 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE
mas01cr@23 1355 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
mas01cr@0 1356 }
mas01cr@0 1357 else{ // Process Web Services Query
mas01cr@23 1358 int listLen = min(trackNN, processedTracks);
mas01cr@0 1359 adbQueryResult->__sizeRlist=listLen;
mas01cr@0 1360 adbQueryResult->__sizeDist=listLen;
mas01cr@0 1361 adbQueryResult->__sizeQpos=listLen;
mas01cr@0 1362 adbQueryResult->__sizeSpos=listLen;
mas01cr@0 1363 adbQueryResult->Rlist= new char*[listLen];
mas01cr@0 1364 adbQueryResult->Dist = new double[listLen];
mas01cr@0 1365 adbQueryResult->Qpos = new int[listLen];
mas01cr@0 1366 adbQueryResult->Spos = new int[listLen];
mas01cr@0 1367 for(k=0; k<adbQueryResult->__sizeRlist; k++){
mas01cr@0 1368 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@23 1369 adbQueryResult->Dist[k]=trackDistances[k];
mas01cr@23 1370 adbQueryResult->Qpos[k]=trackQIndexes[k];
mas01cr@23 1371 adbQueryResult->Spos[k]=trackSIndexes[k];
mas01cr@23 1372 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@0 1373 }
mas01cr@0 1374 }
mas01cr@0 1375
mas01cr@0 1376
mas01cr@0 1377 // Clean up
mas01cr@23 1378 if(trackOffsetTable)
mas01cr@23 1379 delete trackOffsetTable;
mas01cr@0 1380 if(queryCopy)
mas01cr@0 1381 delete queryCopy;
mas01cr@0 1382 if(qNorm)
mas01cr@0 1383 delete qNorm;
mas01cr@0 1384 if(timesdata)
mas01cr@0 1385 delete timesdata;
mas01cr@0 1386 if(meanDBdur)
mas01cr@0 1387 delete meanDBdur;
mas01cr@0 1388
mas01cr@0 1389 }
mas01cr@0 1390
mas01cr@0 1391
mas01cr@23 1392 // k nearest-neighbor (k-NN) search between query and target tracks
mas01cr@23 1393 // efficient implementation based on matched filter
mas01cr@23 1394 // assumes normed shingles
mas01cr@23 1395 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
mas01cr@23 1396 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@0 1397
mas01cr@0 1398 initTables(dbName, inFile);
mas01cr@0 1399
mas01cr@0 1400 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1401 // we use stdout in this stub version
mas01cr@0 1402 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@23 1403 unsigned numTracks = dbH->numFiles;
mas01cr@0 1404
mas01cr@0 1405 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1406 double* data = dataBuf;
mas01cr@0 1407 double* queryCopy = 0;
mas01cr@0 1408
mas01cr@0 1409 double qMeanL2;
mas01cr@0 1410 double* sMeanL2;
mas01cr@0 1411
mas01cr@0 1412 unsigned USE_THRESH=0;
mas01cr@0 1413 double SILENCE_THRESH=0;
mas01cr@0 1414 double DIFF_THRESH=0;
mas01cr@0 1415
mas01cr@0 1416 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 1417 error("Database must be L2 normed for sequence query","use -l2norm");
mas01cr@0 1418
mas01cr@0 1419 if(verbosity>1)
mas01cr@0 1420 cerr << "performing norms ... "; cerr.flush();
mas01cr@0 1421 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@23 1422
mas01cr@0 1423 // Make a copy of the query
mas01cr@0 1424 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1425 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1426 qNorm = new double[numVectors];
mas01cr@0 1427 sNorm = new double[dbVectors];
mas01cr@0 1428 sMeanL2=new double[dbH->numFiles];
mas01cr@0 1429 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
mas01cr@0 1430 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1431 query = queryCopy;
mas01cr@23 1432
mas01cr@0 1433 // Make norm measurements relative to sequenceLength
mas01cr@0 1434 unsigned w = sequenceLength-1;
mas01cr@0 1435 unsigned i,j;
mas01cr@0 1436 double* ps;
mas01cr@0 1437 double tmp1,tmp2;
mas01cr@23 1438
mas01cr@0 1439 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@0 1440 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@0 1441 double* snPtr = sNorm;
mas01cr@0 1442 for(i=0; i<dbH->numFiles; i++){
mas01cr@23 1443 if(trackTable[i]>=sequenceLength){
mas01cr@0 1444 tmp1=*snPtr;
mas01cr@0 1445 j=1;
mas01cr@0 1446 w=sequenceLength-1;
mas01cr@0 1447 while(w--)
mas01cr@0 1448 *snPtr+=snPtr[j++];
mas01cr@0 1449 ps = snPtr+1;
mas01cr@23 1450 w=trackTable[i]-sequenceLength; // +1 - 1
mas01cr@0 1451 while(w--){
mas01cr@0 1452 tmp2=*ps;
mas01cr@23 1453 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01cr@0 1454 tmp1=tmp2;
mas01cr@0 1455 ps++;
mas01cr@0 1456 }
mas01cr@23 1457 ps = snPtr;
mas01cr@23 1458 w=trackTable[i]-sequenceLength+1;
mas01cr@23 1459 while(w--){
mas01cr@23 1460 *ps=sqrt(*ps);
mas01cr@23 1461 ps++;
mas01cr@23 1462 }
mas01cr@0 1463 }
mas01cr@23 1464 snPtr+=trackTable[i];
mas01cr@0 1465 }
mas01cr@0 1466
mas01cr@0 1467 double* pn = sMeanL2;
mas01cr@0 1468 w=dbH->numFiles;
mas01cr@0 1469 while(w--)
mas01cr@0 1470 *pn++=0.0;
mas01cr@0 1471 ps=sNorm;
mas01cr@23 1472 unsigned processedTracks=0;
mas01cr@0 1473 for(i=0; i<dbH->numFiles; i++){
mas01cr@23 1474 if(trackTable[i]>sequenceLength-1){
mas01cr@23 1475 w = trackTable[i]-sequenceLength;
mas01cr@0 1476 pn = sMeanL2+i;
mas01cr@23 1477 *pn=0;
mas01cr@0 1478 while(w--)
mas01cr@23 1479 if(*ps>0)
mas01cr@23 1480 *pn+=*ps++;
mas01cr@23 1481 *pn/=trackTable[i]-sequenceLength;
mas01cr@0 1482 SILENCE_THRESH+=*pn;
mas01cr@23 1483 processedTracks++;
mas01cr@0 1484 }
mas01cr@23 1485 ps = sNorm + trackTable[i];
mas01cr@0 1486 }
mas01cr@0 1487 if(verbosity>1)
mas01cr@23 1488 cerr << "processedTracks: " << processedTracks << endl;
mas01cr@23 1489
mas01cr@23 1490
mas01cr@23 1491 SILENCE_THRESH/=processedTracks;
mas01cr@0 1492 USE_THRESH=1; // Turn thresholding on
mas01cr@23 1493 DIFF_THRESH=SILENCE_THRESH; // mean shingle power
mas01cr@23 1494 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
mas01cr@23 1495 if(verbosity>4)
mas01cr@23 1496 cerr << "silence thresh: " << SILENCE_THRESH;
mas01cr@0 1497 w=sequenceLength-1;
mas01cr@0 1498 i=1;
mas01cr@0 1499 tmp1=*qNorm;
mas01cr@0 1500 while(w--)
mas01cr@0 1501 *qNorm+=qNorm[i++];
mas01cr@0 1502 ps = qNorm+1;
mas01cr@23 1503 w=numVectors-sequenceLength; // +1 -1
mas01cr@0 1504 while(w--){
mas01cr@0 1505 tmp2=*ps;
mas01cr@23 1506 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01cr@0 1507 tmp1=tmp2;
mas01cr@23 1508 ps++;
mas01cr@23 1509 }
mas01cr@23 1510 ps = qNorm;
mas01cr@23 1511 qMeanL2 = 0;
mas01cr@23 1512 w=numVectors-sequenceLength+1;
mas01cr@23 1513 while(w--){
mas01cr@23 1514 *ps=sqrt(*ps);
mas01cr@23 1515 qMeanL2+=*ps++;
mas01cr@0 1516 }
mas01cr@0 1517 qMeanL2 /= numVectors-sequenceLength+1;
mas01cr@23 1518
mas01cr@0 1519 if(verbosity>1)
mas01cr@0 1520 cerr << "done." << endl;
mas01cr@0 1521
mas01cr@0 1522
mas01cr@0 1523 if(verbosity>1)
mas01cr@23 1524 cerr << "matching tracks..." << endl;
mas01cr@0 1525
mas01cr@0 1526 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@23 1527 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@0 1528
mas01cr@0 1529 // Make temporary dynamic memory for results
mas01cr@23 1530 double trackDistances[trackNN];
mas01cr@23 1531 unsigned trackIDs[trackNN];
mas01cr@23 1532 unsigned trackQIndexes[trackNN];
mas01cr@23 1533 unsigned trackSIndexes[trackNN];
mas01cr@0 1534
mas01cr@0 1535 double distances[pointNN];
mas01cr@0 1536 unsigned qIndexes[pointNN];
mas01cr@0 1537 unsigned sIndexes[pointNN];
mas01cr@0 1538
mas01cr@0 1539
mas01cr@23 1540 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@0 1541 double thisDist;
mas01cr@0 1542 double oneOverWL=1.0/wL;
mas01cr@0 1543
mas01cr@0 1544 for(k=0; k<pointNN; k++){
mas01cr@23 1545 distances[k]=1.0e6;
mas01cr@0 1546 qIndexes[k]=~0;
mas01cr@0 1547 sIndexes[k]=~0;
mas01cr@0 1548 }
mas01cr@0 1549
mas01cr@23 1550 for(k=0; k<trackNN; k++){
mas01cr@23 1551 trackDistances[k]=1.0e6;
mas01cr@23 1552 trackQIndexes[k]=~0;
mas01cr@23 1553 trackSIndexes[k]=~0;
mas01cr@23 1554 trackIDs[k]=~0;
mas01cr@0 1555 }
mas01cr@0 1556
mas01cr@0 1557 // Timestamp and durations processing
mas01cr@0 1558 double meanQdur = 0;
mas01cr@0 1559 double* timesdata = 0;
mas01cr@0 1560 double* meanDBdur = 0;
mas01cr@0 1561
mas01cr@0 1562 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1563 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1564 usingTimes=0;
mas01cr@0 1565 }
mas01cr@0 1566
mas01cr@0 1567 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1568 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1569
mas01cr@0 1570 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1571 timesdata = new double[numVectors];
mas01cr@0 1572 assert(timesdata);
mas01cr@0 1573 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1574 // Calculate durations of points
mas01cr@0 1575 for(k=0; k<numVectors-1; k++){
mas01cr@0 1576 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1577 meanQdur+=timesdata[k];
mas01cr@0 1578 }
mas01cr@0 1579 meanQdur/=k;
mas01cr@0 1580 if(verbosity>1)
mas01cr@0 1581 cerr << "mean query file duration: " << meanQdur << endl;
mas01cr@0 1582 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1583 assert(meanDBdur);
mas01cr@0 1584 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1585 meanDBdur[k]=0.0;
mas01cr@23 1586 for(j=0; j<trackTable[k]-1 ; j++)
mas01cr@0 1587 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1588 meanDBdur[k]/=j;
mas01cr@0 1589 }
mas01cr@0 1590 }
mas01cr@0 1591
mas01cr@0 1592 if(usingQueryPoint)
mas01cr@0 1593 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@0 1594 error("queryPoint > numVectors-wL+1 in query");
mas01cr@0 1595 else{
mas01cr@0 1596 if(verbosity>1)
mas01cr@0 1597 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@0 1598 query=query+queryPoint*dbH->dim;
mas01cr@0 1599 qNorm=qNorm+queryPoint;
mas01cr@0 1600 numVectors=wL;
mas01cr@0 1601 }
mas01cr@0 1602
mas01cr@23 1603 double ** D = 0; // Differences query and target
mas01cr@0 1604 double ** DD = 0; // Matched filter distance
mas01cr@0 1605
mas01cr@0 1606 D = new double*[numVectors];
mas01cr@0 1607 assert(D);
mas01cr@0 1608 DD = new double*[numVectors];
mas01cr@0 1609 assert(DD);
mas01cr@0 1610
mas01cr@0 1611 gettimeofday(&tv1, NULL);
mas01cr@23 1612 processedTracks=0;
mas01cr@23 1613 unsigned successfulTracks=0;
mas01cr@0 1614
mas01cr@0 1615 double* qp;
mas01cr@0 1616 double* sp;
mas01cr@0 1617 double* dp;
mas01cr@0 1618 double diffL2;
mas01cr@0 1619
mas01cr@23 1620 // build track offset table
mas01cr@23 1621 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01cr@23 1622 unsigned cumTrack=0;
mas01cr@23 1623 unsigned trackIndexOffset;
mas01cr@0 1624 for(k=0; k<dbH->numFiles;k++){
mas01cr@23 1625 trackOffsetTable[k]=cumTrack;
mas01cr@23 1626 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@0 1627 }
mas01cr@0 1628
mas01cr@0 1629 char nextKey [MAXSTR];
mas01cr@0 1630
mas01cr@23 1631 // chi^2 statistics
mas01cr@23 1632 double sampleCount = 0;
mas01cr@23 1633 double sampleSum = 0;
mas01cr@23 1634 double logSampleSum = 0;
mas01cr@23 1635 double minSample = 1e9;
mas01cr@23 1636 double maxSample = 0;
mas01cr@23 1637
mas01cr@23 1638 // Track loop
mas01cr@23 1639 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01cr@23 1640
mas01cr@23 1641 // get trackID from file if using a control file
mas01cr@23 1642 if(trackFile){
mas01cr@23 1643 if(!trackFile->eof()){
mas01cr@23 1644 trackFile->getline(nextKey,MAXSTR);
mas01cr@23 1645 track=getKeyPos(nextKey);
mas01cr@0 1646 }
mas01cr@0 1647 else
mas01cr@0 1648 break;
mas01cr@0 1649 }
mas01cr@15 1650
mas01cr@23 1651 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01cr@23 1652 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@0 1653
mas01cr@23 1654 if(sequenceLength<trackTable[track]){ // test for short sequences
mas01cr@0 1655
mas01cr@0 1656 if(verbosity>7)
mas01cr@23 1657 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@0 1658
mas01cr@23 1659 // Sum products matrix
mas01cr@0 1660 for(j=0; j<numVectors;j++){
mas01cr@23 1661 D[j]=new double[trackTable[track]];
mas01cr@0 1662 assert(D[j]);
mas01cr@0 1663
mas01cr@0 1664 }
mas01cr@0 1665
mas01cr@0 1666 // Matched filter matrix
mas01cr@0 1667 for(j=0; j<numVectors;j++){
mas01cr@23 1668 DD[j]=new double[trackTable[track]];
mas01cr@0 1669 assert(DD[j]);
mas01cr@0 1670 }
mas01cr@0 1671
mas01cr@23 1672 double tmp;
mas01cr@23 1673 // Dot product
mas01cr@0 1674 for(j=0; j<numVectors; j++)
mas01cr@23 1675 for(k=0; k<trackTable[track]; k++){
mas01cr@0 1676 qp=query+j*dbH->dim;
mas01cr@23 1677 sp=dataBuf+trackOffset+k*dbH->dim;
mas01cr@0 1678 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@0 1679 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@0 1680 *dp=0.0; // initialize correlation cell
mas01cr@0 1681 l=dbH->dim; // size of vectors
mas01cr@0 1682 while(l--)
mas01cr@0 1683 *dp+=*qp++**sp++;
mas01cr@0 1684 }
mas01cr@0 1685
mas01cr@0 1686 // Matched Filter
mas01cr@0 1687 // HOP SIZE == 1
mas01cr@0 1688 double* spd;
mas01cr@0 1689 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@0 1690 for(w=0; w<wL; w++)
mas01cr@0 1691 for(j=0; j<numVectors-w; j++){
mas01cr@0 1692 sp=DD[j];
mas01cr@0 1693 spd=D[j+w]+w;
mas01cr@23 1694 k=trackTable[track]-w;
mas01cr@0 1695 while(k--)
mas01cr@0 1696 *sp+++=*spd++;
mas01cr@0 1697 }
mas01cr@0 1698 }
mas01cr@23 1699
mas01cr@0 1700 else{ // HOP_SIZE != 1
mas01cr@0 1701 for(w=0; w<wL; w++)
mas01cr@0 1702 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@0 1703 sp=DD[j];
mas01cr@0 1704 spd=D[j+w]+w;
mas01cr@23 1705 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@0 1706 *sp+=*spd;
mas01cr@0 1707 sp+=HOP_SIZE;
mas01cr@0 1708 spd+=HOP_SIZE;
mas01cr@0 1709 }
mas01cr@0 1710 }
mas01cr@0 1711 }
mas01cr@0 1712
mas01cr@15 1713 if(verbosity>3 && usingTimes){
mas01cr@23 1714 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
mas01cr@0 1715 cerr.flush();
mas01cr@0 1716 }
mas01cr@0 1717
mas01cr@0 1718 if(!usingTimes ||
mas01cr@0 1719 (usingTimes
mas01cr@23 1720 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1721
mas01cr@15 1722 if(verbosity>3 && usingTimes){
mas01cr@0 1723 cerr << "within duration tolerance." << endl;
mas01cr@0 1724 cerr.flush();
mas01cr@0 1725 }
mas01cr@0 1726
mas01cr@0 1727 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@23 1728 for(j=0;j<numVectors-wL;j+=HOP_SIZE)
mas01cr@23 1729 for(k=0;k<trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@23 1730 thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@23 1731 if(verbosity>10)
mas01cr@23 1732 cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
mas01cr@23 1733 // Gather chi^2 statistics
mas01cr@23 1734 if(thisDist<minSample)
mas01cr@23 1735 minSample=thisDist;
mas01cr@23 1736 else if(thisDist>maxSample)
mas01cr@23 1737 maxSample=thisDist;
mas01cr@23 1738 if(thisDist>1e-9){
mas01cr@23 1739 sampleCount++;
mas01cr@23 1740 sampleSum+=thisDist;
mas01cr@23 1741 logSampleSum+=log(thisDist);
mas01cr@23 1742 }
mas01cr@23 1743
mas01cr@23 1744 // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]);
mas01cr@0 1745 // Power test
mas01cr@0 1746 if(!USE_THRESH ||
mas01cr@0 1747 // Threshold on mean L2 of Q and S sequences
mas01cr@23 1748 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH &&
mas01cr@0 1749 // Are both query and target windows above mean energy?
mas01cr@23 1750 (qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH )))
mas01cr@23 1751 thisDist=thisDist; // Computed above
mas01cr@0 1752 else
mas01cr@23 1753 thisDist=1000000.0;
mas01cr@23 1754
mas01cr@23 1755 // k-NN match algorithm
mas01cr@23 1756 m=pointNN;
mas01cr@23 1757 while(m--){
mas01cr@23 1758 if(thisDist<=distances[m])
mas01cr@23 1759 if(m==0 || thisDist>=distances[m-1]){
mas01cr@0 1760 // Shuffle distances up the list
mas01cr@0 1761 for(l=pointNN-1; l>m; l--){
mas01cr@0 1762 distances[l]=distances[l-1];
mas01cr@0 1763 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1764 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1765 }
mas01cr@0 1766 distances[m]=thisDist;
mas01cr@0 1767 if(usingQueryPoint)
mas01cr@0 1768 qIndexes[m]=queryPoint;
mas01cr@0 1769 else
mas01cr@0 1770 qIndexes[m]=j;
mas01cr@0 1771 sIndexes[m]=k;
mas01cr@0 1772 break;
mas01cr@23 1773 }
mas01cr@0 1774 }
mas01cr@0 1775 }
mas01cr@0 1776 // Calculate the mean of the N-Best matches
mas01cr@0 1777 thisDist=0.0;
mas01cr@0 1778 for(m=0; m<pointNN; m++)
mas01cr@0 1779 thisDist+=distances[m];
mas01cr@0 1780 thisDist/=pointNN;
mas01cr@0 1781
mas01cr@15 1782 // Let's see the distances then...
mas01cr@15 1783 if(verbosity>3)
mas01cr@23 1784 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
mas01cr@15 1785
mas01cr@23 1786
mas01cr@23 1787 // All the track stuff goes here
mas01cr@23 1788 n=trackNN;
mas01cr@0 1789 while(n--){
mas01cr@23 1790 if(thisDist<=trackDistances[n]){
mas01cr@23 1791 if((n==0 || thisDist>=trackDistances[n-1])){
mas01cr@0 1792 // Copy all values above up the queue
mas01cr@23 1793 for( l=trackNN-1 ; l > n ; l--){
mas01cr@23 1794 trackDistances[l]=trackDistances[l-1];
mas01cr@23 1795 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@23 1796 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@23 1797 trackIDs[l]=trackIDs[l-1];
mas01cr@0 1798 }
mas01cr@23 1799 trackDistances[n]=thisDist;
mas01cr@23 1800 trackQIndexes[n]=qIndexes[0];
mas01cr@23 1801 trackSIndexes[n]=sIndexes[0];
mas01cr@23 1802 successfulTracks++;
mas01cr@23 1803 trackIDs[n]=track;
mas01cr@0 1804 break;
mas01cr@0 1805 }
mas01cr@0 1806 }
mas01cr@0 1807 else
mas01cr@0 1808 break;
mas01cr@0 1809 }
mas01cr@0 1810 } // Duration match
mas01cr@23 1811
mas01cr@23 1812 // Clean up current track
mas01cr@0 1813 if(D!=NULL){
mas01cr@0 1814 for(j=0; j<numVectors; j++)
mas01cr@0 1815 delete[] D[j];
mas01cr@0 1816 }
mas01cr@0 1817
mas01cr@0 1818 if(DD!=NULL){
mas01cr@0 1819 for(j=0; j<numVectors; j++)
mas01cr@0 1820 delete[] DD[j];
mas01cr@0 1821 }
mas01cr@0 1822 }
mas01cr@23 1823 // per-track reset array values
mas01cr@23 1824 for(unsigned k=0; k<pointNN; k++){
mas01cr@23 1825 distances[k]=1.0e6;
mas01cr@23 1826 qIndexes[k]=~0;
mas01cr@23 1827 sIndexes[k]=~0;
mas01cr@23 1828 }
mas01cr@0 1829 }
mas01cr@0 1830
mas01cr@0 1831 gettimeofday(&tv2,NULL);
mas01cr@23 1832 if(verbosity>1){
mas01cr@23 1833 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@0 1834 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@23 1835 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01cr@23 1836 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
mas01cr@23 1837 }
mas01cr@0 1838 if(adbQueryResult==0){
mas01cr@0 1839 if(verbosity>1)
mas01cr@0 1840 cerr<<endl;
mas01cr@0 1841 // Output answer
mas01cr@0 1842 // Loop over nearest neighbours
mas01cr@23 1843 for(k=0; k < min(trackNN,successfulTracks); k++)
mas01cr@23 1844 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
mas01cr@23 1845 << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
mas01cr@0 1846 }
mas01cr@0 1847 else{ // Process Web Services Query
mas01cr@23 1848 int listLen = min(trackNN, processedTracks);
mas01cr@0 1849 adbQueryResult->__sizeRlist=listLen;
mas01cr@0 1850 adbQueryResult->__sizeDist=listLen;
mas01cr@0 1851 adbQueryResult->__sizeQpos=listLen;
mas01cr@0 1852 adbQueryResult->__sizeSpos=listLen;
mas01cr@0 1853 adbQueryResult->Rlist= new char*[listLen];
mas01cr@0 1854 adbQueryResult->Dist = new double[listLen];
mas01cr@0 1855 adbQueryResult->Qpos = new int[listLen];
mas01cr@0 1856 adbQueryResult->Spos = new int[listLen];
mas01cr@0 1857 for(k=0; k<adbQueryResult->__sizeRlist; k++){
mas01cr@0 1858 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@23 1859 adbQueryResult->Dist[k]=trackDistances[k];
mas01cr@23 1860 adbQueryResult->Qpos[k]=trackQIndexes[k];
mas01cr@23 1861 adbQueryResult->Spos[k]=trackSIndexes[k];
mas01cr@23 1862 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@0 1863 }
mas01cr@0 1864 }
mas01cr@0 1865
mas01cr@0 1866
mas01cr@0 1867 // Clean up
mas01cr@23 1868 if(trackOffsetTable)
mas01cr@23 1869 delete[] trackOffsetTable;
mas01cr@0 1870 if(queryCopy)
mas01cr@23 1871 delete[] queryCopy;
mas01cr@0 1872 //if(qNorm)
mas01cr@0 1873 //delete qNorm;
mas01cr@0 1874 if(D)
mas01cr@0 1875 delete[] D;
mas01cr@0 1876 if(DD)
mas01cr@0 1877 delete[] DD;
mas01cr@0 1878 if(timesdata)
mas01cr@23 1879 delete[] timesdata;
mas01cr@0 1880 if(meanDBdur)
mas01cr@23 1881 delete[] meanDBdur;
mas01cr@23 1882
mas01cr@23 1883
mas01cr@23 1884 }
mas01cr@23 1885
mas01cr@23 1886 // Radius search between query and target tracks
mas01cr@23 1887 // efficient implementation based on matched filter
mas01cr@23 1888 // assumes normed shingles
mas01cr@23 1889 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
mas01cr@23 1890 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
mas01cr@23 1891
mas01cr@23 1892 initTables(dbName, inFile);
mas01cr@23 1893
mas01cr@23 1894 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@23 1895 // we use stdout in this stub version
mas01cr@23 1896 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@23 1897 unsigned numTracks = dbH->numFiles;
mas01cr@23 1898
mas01cr@23 1899 double* query = (double*)(indata+sizeof(int));
mas01cr@23 1900 double* data = dataBuf;
mas01cr@23 1901 double* queryCopy = 0;
mas01cr@23 1902
mas01cr@23 1903 double qMeanL2;
mas01cr@23 1904 double* sMeanL2;
mas01cr@23 1905
mas01cr@23 1906 unsigned USE_THRESH=0;
mas01cr@23 1907 double SILENCE_THRESH=0;
mas01cr@23 1908 double DIFF_THRESH=0;
mas01cr@23 1909
mas01cr@23 1910 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@23 1911 error("Database must be L2 normed for sequence query","use -l2norm");
mas01cr@23 1912
mas01cr@23 1913 if(verbosity>1)
mas01cr@23 1914 cerr << "performing norms ... "; cerr.flush();
mas01cr@23 1915 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@23 1916
mas01cr@23 1917 // Make a copy of the query
mas01cr@23 1918 queryCopy = new double[numVectors*dbH->dim];
mas01cr@23 1919 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@23 1920 qNorm = new double[numVectors];
mas01cr@23 1921 sNorm = new double[dbVectors];
mas01cr@23 1922 sMeanL2=new double[dbH->numFiles];
mas01cr@23 1923 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
mas01cr@23 1924 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@23 1925 query = queryCopy;
mas01cr@23 1926
mas01cr@23 1927 // Make norm measurements relative to sequenceLength
mas01cr@23 1928 unsigned w = sequenceLength-1;
mas01cr@23 1929 unsigned i,j;
mas01cr@23 1930 double* ps;
mas01cr@23 1931 double tmp1,tmp2;
mas01cr@23 1932
mas01cr@23 1933 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@23 1934 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@23 1935 double* snPtr = sNorm;
mas01cr@23 1936 for(i=0; i<dbH->numFiles; i++){
mas01cr@23 1937 if(trackTable[i]>=sequenceLength){
mas01cr@23 1938 tmp1=*snPtr;
mas01cr@23 1939 j=1;
mas01cr@23 1940 w=sequenceLength-1;
mas01cr@23 1941 while(w--)
mas01cr@23 1942 *snPtr+=snPtr[j++];
mas01cr@23 1943 ps = snPtr+1;
mas01cr@23 1944 w=trackTable[i]-sequenceLength; // +1 - 1
mas01cr@23 1945 while(w--){
mas01cr@23 1946 tmp2=*ps;
mas01cr@23 1947 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01cr@23 1948 tmp1=tmp2;
mas01cr@23 1949 ps++;
mas01cr@23 1950 }
mas01cr@23 1951 ps = snPtr;
mas01cr@23 1952 w=trackTable[i]-sequenceLength+1;
mas01cr@23 1953 while(w--){
mas01cr@23 1954 *ps=sqrt(*ps);
mas01cr@23 1955 ps++;
mas01cr@23 1956 }
mas01cr@23 1957 }
mas01cr@23 1958 snPtr+=trackTable[i];
mas01cr@23 1959 }
mas01cr@23 1960
mas01cr@23 1961 double* pn = sMeanL2;
mas01cr@23 1962 w=dbH->numFiles;
mas01cr@23 1963 while(w--)
mas01cr@23 1964 *pn++=0.0;
mas01cr@23 1965 ps=sNorm;
mas01cr@23 1966 unsigned processedTracks=0;
mas01cr@23 1967 for(i=0; i<dbH->numFiles; i++){
mas01cr@23 1968 if(trackTable[i]>sequenceLength-1){
mas01cr@23 1969 w = trackTable[i]-sequenceLength;
mas01cr@23 1970 pn = sMeanL2+i;
mas01cr@23 1971 *pn=0;
mas01cr@23 1972 while(w--)
mas01cr@23 1973 if(*ps>0)
mas01cr@23 1974 *pn+=*ps++;
mas01cr@23 1975 *pn/=trackTable[i]-sequenceLength;
mas01cr@23 1976 SILENCE_THRESH+=*pn;
mas01cr@23 1977 processedTracks++;
mas01cr@23 1978 }
mas01cr@23 1979 ps = sNorm + trackTable[i];
mas01cr@23 1980 }
mas01cr@23 1981 if(verbosity>1)
mas01cr@23 1982 cerr << "processedTracks: " << processedTracks << endl;
mas01cr@23 1983
mas01cr@23 1984
mas01cr@23 1985 SILENCE_THRESH/=processedTracks;
mas01cr@23 1986 USE_THRESH=1; // Turn thresholding on
mas01cr@23 1987 DIFF_THRESH=SILENCE_THRESH; // mean shingle power
mas01cr@23 1988 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
mas01cr@23 1989 if(verbosity>4)
mas01cr@23 1990 cerr << "silence thresh: " << SILENCE_THRESH;
mas01cr@23 1991 w=sequenceLength-1;
mas01cr@23 1992 i=1;
mas01cr@23 1993 tmp1=*qNorm;
mas01cr@23 1994 while(w--)
mas01cr@23 1995 *qNorm+=qNorm[i++];
mas01cr@23 1996 ps = qNorm+1;
mas01cr@23 1997 w=numVectors-sequenceLength; // +1 -1
mas01cr@23 1998 while(w--){
mas01cr@23 1999 tmp2=*ps;
mas01cr@23 2000 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01cr@23 2001 tmp1=tmp2;
mas01cr@23 2002 ps++;
mas01cr@23 2003 }
mas01cr@23 2004 ps = qNorm;
mas01cr@23 2005 qMeanL2 = 0;
mas01cr@23 2006 w=numVectors-sequenceLength+1;
mas01cr@23 2007 while(w--){
mas01cr@23 2008 *ps=sqrt(*ps);
mas01cr@23 2009 qMeanL2+=*ps++;
mas01cr@23 2010 }
mas01cr@23 2011 qMeanL2 /= numVectors-sequenceLength+1;
mas01cr@23 2012
mas01cr@23 2013 if(verbosity>1)
mas01cr@23 2014 cerr << "done." << endl;
mas01cr@23 2015
mas01cr@23 2016
mas01cr@23 2017 if(verbosity>1)
mas01cr@23 2018 cerr << "matching tracks..." << endl;
mas01cr@23 2019
mas01cr@23 2020 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@23 2021 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@23 2022
mas01cr@23 2023 // Make temporary dynamic memory for results
mas01cr@23 2024 double trackDistances[trackNN];
mas01cr@23 2025 unsigned trackIDs[trackNN];
mas01cr@23 2026 unsigned trackQIndexes[trackNN];
mas01cr@23 2027 unsigned trackSIndexes[trackNN];
mas01cr@23 2028
mas01cr@23 2029 double distances[pointNN];
mas01cr@23 2030 unsigned qIndexes[pointNN];
mas01cr@23 2031 unsigned sIndexes[pointNN];
mas01cr@23 2032
mas01cr@23 2033
mas01cr@23 2034 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@23 2035 double thisDist;
mas01cr@23 2036 double oneOverWL=1.0/wL;
mas01cr@23 2037
mas01cr@23 2038 for(k=0; k<pointNN; k++){
mas01cr@23 2039 distances[k]=0.0;
mas01cr@23 2040 qIndexes[k]=~0;
mas01cr@23 2041 sIndexes[k]=~0;
mas01cr@23 2042 }
mas01cr@23 2043
mas01cr@23 2044 for(k=0; k<trackNN; k++){
mas01cr@23 2045 trackDistances[k]=0.0;
mas01cr@23 2046 trackQIndexes[k]=~0;
mas01cr@23 2047 trackSIndexes[k]=~0;
mas01cr@23 2048 trackIDs[k]=~0;
mas01cr@23 2049 }
mas01cr@23 2050
mas01cr@23 2051 // Timestamp and durations processing
mas01cr@23 2052 double meanQdur = 0;
mas01cr@23 2053 double* timesdata = 0;
mas01cr@23 2054 double* meanDBdur = 0;
mas01cr@23 2055
mas01cr@23 2056 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@23 2057 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@23 2058 usingTimes=0;
mas01cr@23 2059 }
mas01cr@23 2060
mas01cr@23 2061 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@23 2062 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@23 2063
mas01cr@23 2064 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@23 2065 timesdata = new double[numVectors];
mas01cr@23 2066 assert(timesdata);
mas01cr@23 2067 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@23 2068 // Calculate durations of points
mas01cr@23 2069 for(k=0; k<numVectors-1; k++){
mas01cr@23 2070 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@23 2071 meanQdur+=timesdata[k];
mas01cr@23 2072 }
mas01cr@23 2073 meanQdur/=k;
mas01cr@23 2074 if(verbosity>1)
mas01cr@23 2075 cerr << "mean query file duration: " << meanQdur << endl;
mas01cr@23 2076 meanDBdur = new double[dbH->numFiles];
mas01cr@23 2077 assert(meanDBdur);
mas01cr@23 2078 for(k=0; k<dbH->numFiles; k++){
mas01cr@23 2079 meanDBdur[k]=0.0;
mas01cr@23 2080 for(j=0; j<trackTable[k]-1 ; j++)
mas01cr@23 2081 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@23 2082 meanDBdur[k]/=j;
mas01cr@23 2083 }
mas01cr@23 2084 }
mas01cr@23 2085
mas01cr@23 2086 if(usingQueryPoint)
mas01cr@23 2087 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@23 2088 error("queryPoint > numVectors-wL+1 in query");
mas01cr@23 2089 else{
mas01cr@23 2090 if(verbosity>1)
mas01cr@23 2091 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@23 2092 query=query+queryPoint*dbH->dim;
mas01cr@23 2093 qNorm=qNorm+queryPoint;
mas01cr@23 2094 numVectors=wL;
mas01cr@23 2095 }
mas01cr@23 2096
mas01cr@23 2097 double ** D = 0; // Differences query and target
mas01cr@23 2098 double ** DD = 0; // Matched filter distance
mas01cr@23 2099
mas01cr@23 2100 D = new double*[numVectors];
mas01cr@23 2101 assert(D);
mas01cr@23 2102 DD = new double*[numVectors];
mas01cr@23 2103 assert(DD);
mas01cr@23 2104
mas01cr@23 2105 gettimeofday(&tv1, NULL);
mas01cr@23 2106 processedTracks=0;
mas01cr@23 2107 unsigned successfulTracks=0;
mas01cr@23 2108
mas01cr@23 2109 double* qp;
mas01cr@23 2110 double* sp;
mas01cr@23 2111 double* dp;
mas01cr@23 2112 double diffL2;
mas01cr@23 2113
mas01cr@23 2114 // build track offset table
mas01cr@23 2115 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01cr@23 2116 unsigned cumTrack=0;
mas01cr@23 2117 unsigned trackIndexOffset;
mas01cr@23 2118 for(k=0; k<dbH->numFiles;k++){
mas01cr@23 2119 trackOffsetTable[k]=cumTrack;
mas01cr@23 2120 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@23 2121 }
mas01cr@23 2122
mas01cr@23 2123 char nextKey [MAXSTR];
mas01cr@23 2124
mas01cr@23 2125 // chi^2 statistics
mas01cr@23 2126 double sampleCount = 0;
mas01cr@23 2127 double sampleSum = 0;
mas01cr@23 2128 double logSampleSum = 0;
mas01cr@23 2129 double minSample = 1e9;
mas01cr@23 2130 double maxSample = 0;
mas01cr@23 2131
mas01cr@23 2132 // Track loop
mas01cr@23 2133 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01cr@23 2134
mas01cr@23 2135 // get trackID from file if using a control file
mas01cr@23 2136 if(trackFile){
mas01cr@23 2137 if(!trackFile->eof()){
mas01cr@23 2138 trackFile->getline(nextKey,MAXSTR);
mas01cr@23 2139 track=getKeyPos(nextKey);
mas01cr@23 2140 }
mas01cr@23 2141 else
mas01cr@23 2142 break;
mas01cr@23 2143 }
mas01cr@23 2144
mas01cr@23 2145 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01cr@23 2146 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@23 2147
mas01cr@23 2148 if(sequenceLength<trackTable[track]){ // test for short sequences
mas01cr@23 2149
mas01cr@23 2150 if(verbosity>7)
mas01cr@23 2151 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@23 2152
mas01cr@23 2153 // Sum products matrix
mas01cr@23 2154 for(j=0; j<numVectors;j++){
mas01cr@23 2155 D[j]=new double[trackTable[track]];
mas01cr@23 2156 assert(D[j]);
mas01cr@23 2157
mas01cr@23 2158 }
mas01cr@23 2159
mas01cr@23 2160 // Matched filter matrix
mas01cr@23 2161 for(j=0; j<numVectors;j++){
mas01cr@23 2162 DD[j]=new double[trackTable[track]];
mas01cr@23 2163 assert(DD[j]);
mas01cr@23 2164 }
mas01cr@23 2165
mas01cr@23 2166 double tmp;
mas01cr@23 2167 // Dot product
mas01cr@23 2168 for(j=0; j<numVectors; j++)
mas01cr@23 2169 for(k=0; k<trackTable[track]; k++){
mas01cr@23 2170 qp=query+j*dbH->dim;
mas01cr@23 2171 sp=dataBuf+trackOffset+k*dbH->dim;
mas01cr@23 2172 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@23 2173 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@23 2174 *dp=0.0; // initialize correlation cell
mas01cr@23 2175 l=dbH->dim; // size of vectors
mas01cr@23 2176 while(l--)
mas01cr@23 2177 *dp+=*qp++**sp++;
mas01cr@23 2178 }
mas01cr@23 2179
mas01cr@23 2180 // Matched Filter
mas01cr@23 2181 // HOP SIZE == 1
mas01cr@23 2182 double* spd;
mas01cr@23 2183 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@23 2184 for(w=0; w<wL; w++)
mas01cr@23 2185 for(j=0; j<numVectors-w; j++){
mas01cr@23 2186 sp=DD[j];
mas01cr@23 2187 spd=D[j+w]+w;
mas01cr@23 2188 k=trackTable[track]-w;
mas01cr@23 2189 while(k--)
mas01cr@23 2190 *sp+++=*spd++;
mas01cr@23 2191 }
mas01cr@23 2192 }
mas01cr@23 2193
mas01cr@23 2194 else{ // HOP_SIZE != 1
mas01cr@23 2195 for(w=0; w<wL; w++)
mas01cr@23 2196 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@23 2197 sp=DD[j];
mas01cr@23 2198 spd=D[j+w]+w;
mas01cr@23 2199 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@23 2200 *sp+=*spd;
mas01cr@23 2201 sp+=HOP_SIZE;
mas01cr@23 2202 spd+=HOP_SIZE;
mas01cr@23 2203 }
mas01cr@23 2204 }
mas01cr@23 2205 }
mas01cr@23 2206
mas01cr@23 2207 if(verbosity>3 && usingTimes){
mas01cr@23 2208 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
mas01cr@23 2209 cerr.flush();
mas01cr@23 2210 }
mas01cr@23 2211
mas01cr@23 2212 if(!usingTimes ||
mas01cr@23 2213 (usingTimes
mas01cr@23 2214 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@23 2215
mas01cr@23 2216 if(verbosity>3 && usingTimes){
mas01cr@23 2217 cerr << "within duration tolerance." << endl;
mas01cr@23 2218 cerr.flush();
mas01cr@23 2219 }
mas01cr@23 2220
mas01cr@23 2221 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@23 2222 for(j=0;j<numVectors-wL;j+=HOP_SIZE)
mas01cr@23 2223 for(k=0;k<trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@23 2224 thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@23 2225 if(verbosity>10)
mas01cr@23 2226 cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
mas01cr@23 2227 // Gather chi^2 statistics
mas01cr@23 2228 if(thisDist<minSample)
mas01cr@23 2229 minSample=thisDist;
mas01cr@23 2230 else if(thisDist>maxSample)
mas01cr@23 2231 maxSample=thisDist;
mas01cr@23 2232 if(thisDist>1e-9){
mas01cr@23 2233 sampleCount++;
mas01cr@23 2234 sampleSum+=thisDist;
mas01cr@23 2235 logSampleSum+=log(thisDist);
mas01cr@23 2236 }
mas01cr@23 2237
mas01cr@23 2238 // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]);
mas01cr@23 2239 // Power test
mas01cr@23 2240 if(!USE_THRESH ||
mas01cr@23 2241 // Threshold on mean L2 of Q and S sequences
mas01cr@23 2242 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH &&
mas01cr@23 2243 // Are both query and target windows above mean energy?
mas01cr@23 2244 (qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH )))
mas01cr@23 2245 thisDist=thisDist; // Computed above
mas01cr@23 2246 else
mas01cr@23 2247 thisDist=1000000.0;
mas01cr@23 2248 if(thisDist>=0 && thisDist<=radius){
mas01cr@23 2249 distances[0]++; // increment count
mas01cr@23 2250 break; // only need one track point per query point
mas01cr@23 2251 }
mas01cr@23 2252 }
mas01cr@23 2253 // How many points were below threshold ?
mas01cr@23 2254 thisDist=distances[0];
mas01cr@23 2255
mas01cr@23 2256 // Let's see the distances then...
mas01cr@23 2257 if(verbosity>3)
mas01cr@23 2258 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
mas01cr@23 2259
mas01cr@23 2260 // All the track stuff goes here
mas01cr@23 2261 n=trackNN;
mas01cr@23 2262 while(n--){
mas01cr@23 2263 if(thisDist>trackDistances[n]){
mas01cr@23 2264 if((n==0 || thisDist<=trackDistances[n-1])){
mas01cr@23 2265 // Copy all values above up the queue
mas01cr@23 2266 for( l=trackNN-1 ; l > n ; l--){
mas01cr@23 2267 trackDistances[l]=trackDistances[l-1];
mas01cr@23 2268 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@23 2269 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@23 2270 trackIDs[l]=trackIDs[l-1];
mas01cr@23 2271 }
mas01cr@23 2272 trackDistances[n]=thisDist;
mas01cr@23 2273 trackQIndexes[n]=qIndexes[0];
mas01cr@23 2274 trackSIndexes[n]=sIndexes[0];
mas01cr@23 2275 successfulTracks++;
mas01cr@23 2276 trackIDs[n]=track;
mas01cr@23 2277 break;
mas01cr@23 2278 }
mas01cr@23 2279 }
mas01cr@23 2280 else
mas01cr@23 2281 break;
mas01cr@23 2282 }
mas01cr@23 2283 } // Duration match
mas01cr@23 2284
mas01cr@23 2285 // Clean up current track
mas01cr@23 2286 if(D!=NULL){
mas01cr@23 2287 for(j=0; j<numVectors; j++)
mas01cr@23 2288 delete[] D[j];
mas01cr@23 2289 }
mas01cr@23 2290
mas01cr@23 2291 if(DD!=NULL){
mas01cr@23 2292 for(j=0; j<numVectors; j++)
mas01cr@23 2293 delete[] DD[j];
mas01cr@23 2294 }
mas01cr@23 2295 }
mas01cr@23 2296 // per-track reset array values
mas01cr@23 2297 for(unsigned k=0; k<pointNN; k++){
mas01cr@23 2298 distances[k]=0.0;
mas01cr@23 2299 qIndexes[k]=~0;
mas01cr@23 2300 sIndexes[k]=~0;
mas01cr@23 2301 }
mas01cr@23 2302 }
mas01cr@23 2303
mas01cr@23 2304 gettimeofday(&tv2,NULL);
mas01cr@23 2305 if(verbosity>1){
mas01cr@23 2306 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@23 2307 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@23 2308 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01cr@23 2309 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
mas01cr@23 2310 }
mas01cr@23 2311
mas01cr@23 2312 if(adbQueryResult==0){
mas01cr@23 2313 if(verbosity>1)
mas01cr@23 2314 cerr<<endl;
mas01cr@23 2315 // Output answer
mas01cr@23 2316 // Loop over nearest neighbours
mas01cr@23 2317 for(k=0; k < min(trackNN,successfulTracks); k++)
mas01cr@23 2318 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl;
mas01cr@23 2319 }
mas01cr@23 2320 else{ // Process Web Services Query
mas01cr@23 2321 int listLen = min(trackNN, processedTracks);
mas01cr@23 2322 adbQueryResult->__sizeRlist=listLen;
mas01cr@23 2323 adbQueryResult->__sizeDist=listLen;
mas01cr@23 2324 adbQueryResult->__sizeQpos=listLen;
mas01cr@23 2325 adbQueryResult->__sizeSpos=listLen;
mas01cr@23 2326 adbQueryResult->Rlist= new char*[listLen];
mas01cr@23 2327 adbQueryResult->Dist = new double[listLen];
mas01cr@23 2328 adbQueryResult->Qpos = new int[listLen];
mas01cr@23 2329 adbQueryResult->Spos = new int[listLen];
mas01cr@23 2330 for(k=0; k<adbQueryResult->__sizeRlist; k++){
mas01cr@23 2331 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@23 2332 adbQueryResult->Dist[k]=trackDistances[k];
mas01cr@23 2333 adbQueryResult->Qpos[k]=trackQIndexes[k];
mas01cr@23 2334 adbQueryResult->Spos[k]=trackSIndexes[k];
mas01cr@23 2335 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@23 2336 }
mas01cr@23 2337 }
mas01cr@23 2338
mas01cr@23 2339
mas01cr@23 2340 // Clean up
mas01cr@23 2341 if(trackOffsetTable)
mas01cr@23 2342 delete[] trackOffsetTable;
mas01cr@23 2343 if(queryCopy)
mas01cr@23 2344 delete[] queryCopy;
mas01cr@23 2345 //if(qNorm)
mas01cr@23 2346 //delete qNorm;
mas01cr@23 2347 if(D)
mas01cr@23 2348 delete[] D;
mas01cr@23 2349 if(DD)
mas01cr@23 2350 delete[] DD;
mas01cr@23 2351 if(timesdata)
mas01cr@23 2352 delete[] timesdata;
mas01cr@23 2353 if(meanDBdur)
mas01cr@23 2354 delete[] meanDBdur;
mas01cr@0 2355
mas01cr@0 2356
mas01cr@0 2357 }
mas01cr@0 2358
mas01cr@0 2359 void audioDB::normalize(double* X, int dim, int n){
mas01cr@0 2360 unsigned c = n*dim;
mas01cr@0 2361 double minval,maxval,v,*p;
mas01cr@0 2362
mas01cr@0 2363 p=X;
mas01cr@0 2364 while(c--){
mas01cr@0 2365 v=*p++;
mas01cr@0 2366 if(v<minval)
mas01cr@0 2367 minval=v;
mas01cr@0 2368 else if(v>maxval)
mas01cr@0 2369 maxval=v;
mas01cr@0 2370 }
mas01cr@0 2371
mas01cr@0 2372 normalize(X, dim, n, minval, maxval);
mas01cr@0 2373
mas01cr@0 2374 }
mas01cr@0 2375
mas01cr@0 2376 void audioDB::normalize(double* X, int dim, int n, double minval, double maxval){
mas01cr@0 2377 unsigned c = n*dim;
mas01cr@0 2378 double *p;
mas01cr@0 2379
mas01cr@0 2380
mas01cr@0 2381 if(maxval==minval)
mas01cr@0 2382 return;
mas01cr@0 2383
mas01cr@0 2384 maxval=1.0/(maxval-minval);
mas01cr@0 2385 c=n*dim;
mas01cr@0 2386 p=X;
mas01cr@0 2387
mas01cr@0 2388 while(c--){
mas01cr@0 2389 *p=(*p-minval)*maxval;
mas01cr@0 2390 p++;
mas01cr@0 2391 }
mas01cr@0 2392 }
mas01cr@0 2393
mas01cr@0 2394 // Unit norm block of features
mas01cr@0 2395 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@0 2396 unsigned d;
mas01cr@0 2397 double L2, oneOverL2, *p;
mas01cr@0 2398 if(verbosity>2)
mas01cr@0 2399 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@0 2400 while(n--){
mas01cr@0 2401 p=X;
mas01cr@0 2402 L2=0.0;
mas01cr@0 2403 d=dim;
mas01cr@0 2404 while(d--){
mas01cr@0 2405 L2+=*p**p;
mas01cr@0 2406 p++;
mas01cr@0 2407 }
mas01cr@23 2408 /* L2=sqrt(L2);*/
mas01cr@0 2409 if(qNorm)
mas01cr@0 2410 *qNorm++=L2;
mas01cr@23 2411 /*
mas01cr@0 2412 oneOverL2 = 1.0/L2;
mas01cr@0 2413 d=dim;
mas01cr@0 2414 while(d--){
mas01cr@0 2415 *X*=oneOverL2;
mas01cr@0 2416 X++;
mas01cr@23 2417 */
mas01cr@23 2418 X+=dim;
mas01cr@0 2419 }
mas01cr@0 2420 if(verbosity>2)
mas01cr@0 2421 cerr << "done..." << endl;
mas01cr@0 2422 }
mas01cr@0 2423
mas01cr@0 2424 // Unit norm block of features
mas01cr@0 2425 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 2426 unsigned d;
mas01cr@0 2427 double L2, oneOverL2, *p;
mas01cr@0 2428 unsigned nn = n;
mas01cr@0 2429
mas01cr@0 2430 assert(l2normTable);
mas01cr@0 2431
mas01cr@0 2432 if( !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 2433 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 2434
mas01cr@0 2435 if(verbosity>2)
mas01cr@0 2436 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@0 2437
mas01cr@0 2438 double* l2buf = new double[n];
mas01cr@0 2439 double* l2ptr = l2buf;
mas01cr@0 2440 assert(l2buf);
mas01cr@0 2441 assert(X);
mas01cr@0 2442
mas01cr@0 2443 while(nn--){
mas01cr@0 2444 p=X;
mas01cr@0 2445 *l2ptr=0.0;
mas01cr@0 2446 d=dim;
mas01cr@0 2447 while(d--){
mas01cr@0 2448 *l2ptr+=*p**p;
mas01cr@0 2449 p++;
mas01cr@0 2450 }
mas01cr@23 2451 l2ptr++;
mas01cr@23 2452 /*
mas01cr@23 2453 oneOverL2 = 1.0/(*l2ptr++);
mas01cr@23 2454 d=dim;
mas01cr@23 2455 while(d--){
mas01cr@0 2456 *X*=oneOverL2;
mas01cr@0 2457 X++;
mas01cr@23 2458 }
mas01cr@23 2459 */
mas01cr@23 2460 X+=dim;
mas01cr@0 2461 }
mas01cr@0 2462 unsigned offset;
mas01cr@0 2463 if(append)
mas01cr@0 2464 offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors
mas01cr@0 2465 else
mas01cr@0 2466 offset=0;
mas01cr@0 2467 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 2468 if(l2buf)
mas01cr@23 2469 delete[] l2buf;
mas01cr@0 2470 if(verbosity>2)
mas01cr@0 2471 cerr << "done..." << endl;
mas01cr@0 2472 }
mas01cr@0 2473
mas01cr@0 2474
mas01cr@0 2475 // Start an audioDB server on the host
mas01cr@0 2476 void audioDB::startServer(){
mas01cr@0 2477 struct soap soap;
mas01cr@0 2478 int m, s; // master and slave sockets
mas01cr@0 2479 soap_init(&soap);
mas01cr@0 2480 m = soap_bind(&soap, NULL, port, 100);
mas01cr@0 2481 if (m < 0)
mas01cr@0 2482 soap_print_fault(&soap, stderr);
mas01cr@0 2483 else
mas01cr@0 2484 {
mas01cr@0 2485 fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
mas01cr@0 2486 for (int i = 1; ; i++)
mas01cr@0 2487 {
mas01cr@0 2488 s = soap_accept(&soap);
mas01cr@0 2489 if (s < 0)
mas01cr@0 2490 {
mas01cr@0 2491 soap_print_fault(&soap, stderr);
mas01cr@0 2492 break;
mas01cr@0 2493 }
mas01cr@0 2494 fprintf(stderr, "%d: accepted connection from IP=%d.%d.%d.%d socket=%d\n", i,
mas01cr@0 2495 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
mas01cr@0 2496 if (soap_serve(&soap) != SOAP_OK) // process RPC request
mas01cr@0 2497 soap_print_fault(&soap, stderr); // print error
mas01cr@0 2498 fprintf(stderr, "request served\n");
mas01cr@0 2499 soap_destroy(&soap); // clean up class instances
mas01cr@0 2500 soap_end(&soap); // clean up everything and close socket
mas01cr@0 2501 }
mas01cr@0 2502 }
mas01cr@0 2503 soap_done(&soap); // close master socket and detach environment
mas01cr@0 2504 }
mas01cr@0 2505
mas01cr@0 2506
mas01cr@0 2507 // web services
mas01cr@0 2508
mas01cr@0 2509 // SERVER SIDE
mas01cr@0 2510 int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){
mas01cr@23 2511 char* const argv[]={"audioDB",COM_STATUS,"-d",dbName};
mas01cr@23 2512 const unsigned argc = 4;
mas01cr@0 2513 audioDB(argc,argv);
mas01cr@0 2514 adbCreateResult=100;
mas01cr@0 2515 return SOAP_OK;
mas01cr@0 2516 }
mas01cr@0 2517
mas01cr@0 2518 // Literal translation of command line to web service
mas01cr@0 2519
mas01cr@23 2520 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResult &adbQueryResult){
mas01cr@0 2521 char queryType[256];
mas01cr@0 2522 for(int k=0; k<256; k++)
mas01cr@0 2523 queryType[k]='\0';
mas01cr@0 2524 if(qType == O2_FLAG_POINT_QUERY)
mas01cr@0 2525 strncpy(queryType, "point", strlen("point"));
mas01cr@0 2526 else if (qType == O2_FLAG_SEQUENCE_QUERY)
mas01cr@0 2527 strncpy(queryType, "sequence", strlen("sequence"));
mas01cr@23 2528 else if(qType == O2_FLAG_TRACK_QUERY)
mas01cr@23 2529 strncpy(queryType,"track", strlen("track"));
mas01cr@0 2530 else
mas01cr@0 2531 strncpy(queryType, "", strlen(""));
mas01cr@0 2532
mas01cr@0 2533 if(pointNN==0)
mas01cr@0 2534 pointNN=10;
mas01cr@23 2535 if(trackNN==0)
mas01cr@23 2536 trackNN=10;
mas01cr@0 2537 if(seqLen==0)
mas01cr@0 2538 seqLen=16;
mas01cr@0 2539
mas01cr@0 2540 char qPosStr[256];
mas01cr@0 2541 sprintf(qPosStr, "%d", qPos);
mas01cr@0 2542 char pointNNStr[256];
mas01cr@0 2543 sprintf(pointNNStr,"%d",pointNN);
mas01cr@23 2544 char trackNNStr[256];
mas01cr@23 2545 sprintf(trackNNStr,"%d",trackNN);
mas01cr@0 2546 char seqLenStr[256];
mas01cr@0 2547 sprintf(seqLenStr,"%d",seqLen);
mas01cr@0 2548
mas01cr@0 2549 const char* argv[] ={
mas01cr@0 2550 "./audioDB",
mas01cr@0 2551 COM_QUERY,
mas01cr@0 2552 queryType, // Need to pass a parameter
mas01cr@0 2553 COM_DATABASE,
mas01cr@0 2554 dbName,
mas01cr@0 2555 COM_FEATURES,
mas01cr@0 2556 qKey,
mas01cr@0 2557 COM_KEYLIST,
mas01cr@0 2558 keyList==0?"":keyList,
mas01cr@0 2559 COM_TIMES,
mas01cr@0 2560 timesFileName==0?"":timesFileName,
mas01cr@0 2561 COM_QPOINT,
mas01cr@0 2562 qPosStr,
mas01cr@0 2563 COM_POINTNN,
mas01cr@0 2564 pointNNStr,
mas01cr@23 2565 COM_TRACKNN,
mas01cr@23 2566 trackNNStr, // Need to pass a parameter
mas01cr@0 2567 COM_SEQLEN,
mas01cr@0 2568 seqLenStr
mas01cr@0 2569 };
mas01cr@0 2570
mas01cr@0 2571 const unsigned argc = 19;
mas01cr@0 2572 audioDB(argc, (char* const*)argv, &adbQueryResult);
mas01cr@0 2573 return SOAP_OK;
mas01cr@0 2574 }
mas01cr@0 2575
mas01cr@0 2576 int main(const unsigned argc, char* const argv[]){
mas01cr@0 2577 audioDB(argc, argv);
mas01cr@0 2578 }