annotate audioDB.cpp @ 185:ae212368a874 no-big-mmap

Typo fix in cleanup() I'm not sure why the tests didn't find this typo, but maybe the munmap() is being slightly permissive about not segfaulting horribly when given something it has already unmapped...
author mas01cr
date Fri, 16 Nov 2007 11:44:55 +0000
parents 1fb3d82aa590
children 530154ca4cf5
rev   line source
mas01cr@0 1 #include "audioDB.h"
mas01cr@0 2
mas01cr@105 3 #if defined(O2_DEBUG)
mas01cr@104 4 void sigterm_action(int signal, siginfo_t *info, void *context) {
mas01cr@104 5 exit(128+signal);
mas01cr@104 6 }
mas01cr@104 7
mas01cr@104 8 void sighup_action(int signal, siginfo_t *info, void *context) {
mas01cr@104 9 // FIXME: reread any configuration files
mas01cr@104 10 }
mas01cr@105 11 #endif
mas01cr@0 12
mas01cr@32 13 void audioDB::error(const char* a, const char* b, const char *sysFunc) {
mas01cr@77 14 if(isServer) {
mas01cr@81 15 /* FIXME: I think this is leaky -- we never delete err. actually
mas01cr@81 16 deleting it is tricky, though; it gets placed into some
mas01cr@81 17 soap-internal struct with uncertain extent... -- CSR,
mas01cr@81 18 2007-10-01 */
mas01cr@77 19 char *err = new char[256]; /* FIXME: overflows */
mas01cr@77 20 snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : "");
mas01cr@77 21 /* FIXME: actually we could usefully do with a properly structured
mas01cr@77 22 type, so that we can throw separate faultstring and details.
mas01cr@77 23 -- CSR, 2007-10-01 */
mas01cr@77 24 throw(err);
mas01cr@77 25 } else {
mas01cr@77 26 cerr << a << ": " << b << endl;
mas01cr@77 27 if (sysFunc) {
mas01cr@77 28 perror(sysFunc);
mas01cr@77 29 }
mas01cr@77 30 exit(1);
mas01cr@32 31 }
mas01cr@0 32 }
mas01cr@0 33
mas01cr@76 34 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
mas01cr@76 35 {
mas01cr@0 36 if(processArgs(argc, argv)<0){
mas01cr@0 37 printf("No command found.\n");
mas01cr@0 38 cmdline_parser_print_version ();
mas01cr@0 39 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 40 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 41 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 42 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 43 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 44 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@151 45 error("No command found");
mas01cr@0 46 }
mas01cr@77 47
mas01cr@0 48 if(O2_ACTION(COM_SERVER))
mas01cr@0 49 startServer();
mas01cr@0 50
mas01cr@0 51 else if(O2_ACTION(COM_CREATE))
mas01cr@0 52 create(dbName);
mas01cr@0 53
mas01cr@0 54 else if(O2_ACTION(COM_INSERT))
mas01cr@0 55 insert(dbName, inFile);
mas01cr@0 56
mas01cr@0 57 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 58 batchinsert(dbName, inFile);
mas01cr@0 59
mas01cr@0 60 else if(O2_ACTION(COM_QUERY))
mas01cr@0 61 if(isClient)
mas01cr@0 62 ws_query(dbName, inFile, (char*)hostport);
mas01cr@0 63 else
mas01cr@76 64 query(dbName, inFile);
mas01cr@0 65
mas01cr@0 66 else if(O2_ACTION(COM_STATUS))
mas01cr@0 67 if(isClient)
mas01cr@0 68 ws_status(dbName,(char*)hostport);
mas01cr@0 69 else
mas01cr@0 70 status(dbName);
mas01cr@0 71
mas01cr@0 72 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 73 l2norm(dbName);
mas01cr@0 74
mas01cr@0 75 else if(O2_ACTION(COM_DUMP))
mas01cr@0 76 dump(dbName);
mas01cr@0 77
mas01cr@0 78 else
mas01cr@0 79 error("Unrecognized command",command);
mas01cr@0 80 }
mas01cr@0 81
mas01cr@133 82 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 83 {
mas01cr@97 84 try {
mas01cr@151 85 isServer = 1; // FIXME: Hack
mas01cr@97 86 processArgs(argc, argv);
mas01cr@97 87 assert(O2_ACTION(COM_QUERY));
mas01cr@133 88 query(dbName, inFile, adbQueryResponse);
mas01cr@97 89 } catch(char *err) {
mas01cr@97 90 cleanup();
mas01cr@97 91 throw(err);
mas01cr@97 92 }
mas01cr@76 93 }
mas01cr@76 94
mas01cr@133 95 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 96 {
mas01cr@97 97 try {
mas01cr@151 98 isServer = 1; // FIXME: Hack
mas01cr@97 99 processArgs(argc, argv);
mas01cr@97 100 assert(O2_ACTION(COM_STATUS));
mas01cr@133 101 status(dbName, adbStatusResponse);
mas01cr@97 102 } catch(char *err) {
mas01cr@97 103 cleanup();
mas01cr@97 104 throw(err);
mas01cr@97 105 }
mas01cr@76 106 }
mas01cr@76 107
mas01cr@97 108 void audioDB::cleanup() {
mas01cr@122 109 cmdline_parser_free(&args_info);
mas01cr@0 110 if(indata)
mas01cr@0 111 munmap(indata,statbuf.st_size);
mas01cr@0 112 if(db)
mas01cr@176 113 munmap(db,getpagesize());
mas01cr@175 114 if(fileTable)
mas01cr@178 115 munmap(fileTable, fileTableLength);
mas01cr@175 116 if(trackTable)
mas01cr@178 117 munmap(trackTable, trackTableLength);
mas01cr@175 118 if(dataBuf)
mas01cr@178 119 munmap(dataBuf, dataBufLength);
mas01cr@175 120 if(timesTable)
mas01cr@185 121 munmap(timesTable, timesTableLength);
mas01cr@175 122 if(l2normTable)
mas01cr@178 123 munmap(l2normTable, l2normTableLength);
mas01cr@175 124
mas01cr@0 125 if(dbfid>0)
mas01cr@0 126 close(dbfid);
mas01cr@0 127 if(infid>0)
mas01cr@0 128 close(infid);
mas01cr@0 129 if(dbH)
mas01cr@0 130 delete dbH;
mas01cr@0 131 }
mas01cr@0 132
mas01cr@97 133 audioDB::~audioDB(){
mas01cr@97 134 cleanup();
mas01cr@97 135 }
mas01cr@97 136
mas01cr@0 137 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 138
mas01cr@0 139 if(argc<2){
mas01cr@0 140 cmdline_parser_print_version ();
mas01cr@0 141 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 142 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 143 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 144 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 145 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 146 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 147 exit(0);
mas01cr@0 148 }
mas01cr@0 149
mas01cr@0 150 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@151 151 error("Error parsing command line");
mas01cr@0 152
mas01cr@0 153 if(args_info.help_given){
mas01cr@0 154 cmdline_parser_print_help();
mas01cr@0 155 exit(0);
mas01cr@0 156 }
mas01cr@0 157
mas01cr@0 158 if(args_info.verbosity_given){
mas01cr@0 159 verbosity=args_info.verbosity_arg;
mas01cr@0 160 if(verbosity<0 || verbosity>10){
mas01cr@0 161 cerr << "Warning: verbosity out of range, setting to 1" << endl;
mas01cr@0 162 verbosity=1;
mas01cr@0 163 }
mas01cr@0 164 }
mas01cr@0 165
mas01cr@129 166 if(args_info.size_given) {
mas01cr@140 167 if (args_info.size_arg < 50 || args_info.size_arg > 4000) {
mas01cr@129 168 error("Size out of range", "");
mas01cr@129 169 }
mas01cr@129 170 size = args_info.size_arg * 1000000;
mas01cr@129 171 }
mas01cr@129 172
mas01mc@17 173 if(args_info.radius_given){
mas01mc@17 174 radius=args_info.radius_arg;
mas01mc@17 175 if(radius<=0 || radius>1000000000){
mas01cr@77 176 error("radius out of range");
mas01mc@17 177 }
mas01mc@17 178 else
mas01cr@60 179 if(verbosity>3) {
mas01mc@17 180 cerr << "Setting radius to " << radius << endl;
mas01cr@60 181 }
mas01mc@17 182 }
mas01mc@17 183
mas01cr@0 184 if(args_info.SERVER_given){
mas01cr@0 185 command=COM_SERVER;
mas01cr@0 186 port=args_info.SERVER_arg;
mas01cr@0 187 if(port<100 || port > 100000)
mas01cr@0 188 error("port out of range");
mas01cr@151 189 isServer = 1;
mas01cr@105 190 #if defined(O2_DEBUG)
mas01cr@104 191 struct sigaction sa;
mas01cr@104 192 sa.sa_sigaction = sigterm_action;
mas01cr@104 193 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 194 sigaction(SIGTERM, &sa, NULL);
mas01cr@104 195 sa.sa_sigaction = sighup_action;
mas01cr@104 196 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 197 sigaction(SIGHUP, &sa, NULL);
mas01cr@105 198 #endif
mas01cr@0 199 return 0;
mas01cr@0 200 }
mas01cr@0 201
mas01cr@0 202 // No return on client command, find database command
mas01cr@105 203 if(args_info.client_given){
mas01cr@105 204 command=COM_CLIENT;
mas01cr@105 205 hostport=args_info.client_arg;
mas01cr@105 206 isClient=1;
mas01cr@105 207 }
mas01cr@0 208
mas01cr@105 209 if(args_info.NEW_given){
mas01cr@105 210 command=COM_CREATE;
mas01cr@105 211 dbName=args_info.database_arg;
mas01cr@105 212 return 0;
mas01cr@105 213 }
mas01cr@0 214
mas01cr@105 215 if(args_info.STATUS_given){
mas01cr@105 216 command=COM_STATUS;
mas01cr@105 217 dbName=args_info.database_arg;
mas01cr@105 218 return 0;
mas01cr@105 219 }
mas01cr@0 220
mas01cr@105 221 if(args_info.DUMP_given){
mas01cr@105 222 command=COM_DUMP;
mas01cr@105 223 dbName=args_info.database_arg;
mas01cr@131 224 output = args_info.output_arg;
mas01cr@105 225 return 0;
mas01cr@105 226 }
mas01cr@0 227
mas01cr@105 228 if(args_info.L2NORM_given){
mas01cr@105 229 command=COM_L2NORM;
mas01cr@105 230 dbName=args_info.database_arg;
mas01cr@105 231 return 0;
mas01cr@105 232 }
mas01cr@0 233
mas01cr@105 234 if(args_info.INSERT_given){
mas01cr@105 235 command=COM_INSERT;
mas01cr@105 236 dbName=args_info.database_arg;
mas01cr@105 237 inFile=args_info.features_arg;
mas01cr@105 238 if(args_info.key_given)
mas01cr@105 239 key=args_info.key_arg;
mas01cr@105 240 if(args_info.times_given){
mas01cr@105 241 timesFileName=args_info.times_arg;
mas01cr@105 242 if(strlen(timesFileName)>0){
mas01cr@105 243 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@105 244 error("Could not open times file for reading", timesFileName);
mas01cr@105 245 usingTimes=1;
mas01cr@105 246 }
mas01cr@105 247 }
mas01cr@105 248 return 0;
mas01cr@105 249 }
mas01cr@105 250
mas01cr@105 251 if(args_info.BATCHINSERT_given){
mas01cr@105 252 command=COM_BATCHINSERT;
mas01cr@105 253 dbName=args_info.database_arg;
mas01cr@105 254 inFile=args_info.featureList_arg;
mas01cr@105 255 if(args_info.keyList_given)
mas01cr@105 256 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 257
mas01cr@105 258 /* TO DO: REPLACE WITH
mas01cr@0 259 if(args_info.keyList_given){
mas01mc@18 260 trackFileName=args_info.keyList_arg;
mas01mc@18 261 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
mas01mc@18 262 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 263 }
mas01cr@0 264 AND UPDATE BATCHINSERT()
mas01cr@105 265 */
mas01cr@105 266
mas01cr@105 267 if(args_info.timesList_given){
mas01cr@105 268 timesFileName=args_info.timesList_arg;
mas01cr@105 269 if(strlen(timesFileName)>0){
mas01cr@105 270 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@105 271 error("Could not open timesList file for reading", timesFileName);
mas01cr@105 272 usingTimes=1;
mas01cr@105 273 }
mas01cr@105 274 }
mas01cr@105 275 return 0;
mas01cr@105 276 }
mas01cr@105 277
mas01cr@105 278 // Query command and arguments
mas01cr@105 279 if(args_info.QUERY_given){
mas01cr@105 280 command=COM_QUERY;
mas01cr@105 281 dbName=args_info.database_arg;
mas01cr@105 282 inFile=args_info.features_arg;
mas01cr@105 283
mas01cr@105 284 if(args_info.keyList_given){
mas01cr@105 285 trackFileName=args_info.keyList_arg;
mas01cr@105 286 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
mas01cr@105 287 error("Could not open keyList file for reading",trackFileName);
mas01cr@105 288 }
mas01cr@105 289
mas01cr@105 290 if(args_info.times_given){
mas01cr@105 291 timesFileName=args_info.times_arg;
mas01cr@105 292 if(strlen(timesFileName)>0){
mas01cr@105 293 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@105 294 error("Could not open times file for reading", timesFileName);
mas01cr@105 295 usingTimes=1;
mas01cr@105 296 }
mas01cr@105 297 }
mas01cr@105 298
mas01cr@105 299 // query type
mas01cr@105 300 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@105 301 queryType=O2_TRACK_QUERY;
mas01cr@105 302 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@105 303 queryType=O2_POINT_QUERY;
mas01cr@105 304 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@105 305 queryType=O2_SEQUENCE_QUERY;
mas01cr@105 306 else
mas01cr@105 307 error("unsupported query type",args_info.QUERY_arg);
mas01cr@105 308
mas01cr@105 309 if(!args_info.exhaustive_flag){
mas01cr@105 310 queryPoint = args_info.qpoint_arg;
mas01cr@105 311 usingQueryPoint=1;
mas01cr@105 312 if(queryPoint<0 || queryPoint >10000)
mas01cr@105 313 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@105 314 }
mas01cr@105 315
mas01cr@105 316 pointNN = args_info.pointnn_arg;
mas01cr@105 317 if(pointNN < 1 || pointNN > 1000) {
mas01cr@105 318 error("pointNN out of range: 1 <= pointNN <= 1000");
mas01cr@105 319 }
mas01cr@105 320 trackNN = args_info.resultlength_arg;
mas01cr@105 321 if(trackNN < 1 || trackNN > 1000) {
mas01cr@105 322 error("resultlength out of range: 1 <= resultlength <= 1000");
mas01cr@105 323 }
mas01cr@105 324 sequenceLength = args_info.sequencelength_arg;
mas01cr@105 325 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01cr@105 326 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@105 327 }
mas01cr@105 328 sequenceHop = args_info.sequencehop_arg;
mas01cr@105 329 if(sequenceHop < 1 || sequenceHop > 1000) {
mas01cr@105 330 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01cr@105 331 }
mas01cr@105 332 return 0;
mas01cr@105 333 }
mas01cr@105 334 return -1; // no command found
mas01cr@0 335 }
mas01cr@0 336
mas01cr@30 337 void audioDB::get_lock(int fd, bool exclusive) {
mas01cr@30 338 struct flock lock;
mas01cr@30 339 int status;
mas01cr@30 340
mas01cr@30 341 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
mas01cr@30 342 lock.l_whence = SEEK_SET;
mas01cr@30 343 lock.l_start = 0;
mas01cr@30 344 lock.l_len = 0; /* "the whole file" */
mas01cr@30 345
mas01cr@30 346 retry:
mas01cr@30 347 do {
mas01cr@30 348 status = fcntl(fd, F_SETLKW, &lock);
mas01cr@30 349 } while (status != 0 && errno == EINTR);
mas01cr@30 350
mas01cr@30 351 if (status) {
mas01cr@30 352 if (errno == EAGAIN) {
mas01cr@30 353 sleep(1);
mas01cr@30 354 goto retry;
mas01cr@30 355 } else {
mas01cr@32 356 error("fcntl lock error", "", "fcntl");
mas01cr@30 357 }
mas01cr@30 358 }
mas01cr@30 359 }
mas01cr@30 360
mas01cr@30 361 void audioDB::release_lock(int fd) {
mas01cr@30 362 struct flock lock;
mas01cr@30 363 int status;
mas01cr@30 364
mas01cr@30 365 lock.l_type = F_UNLCK;
mas01cr@30 366 lock.l_whence = SEEK_SET;
mas01cr@30 367 lock.l_start = 0;
mas01cr@30 368 lock.l_len = 0;
mas01cr@30 369
mas01cr@30 370 status = fcntl(fd, F_SETLKW, &lock);
mas01cr@30 371
mas01cr@30 372 if (status)
mas01cr@32 373 error("fcntl unlock error", "", "fcntl");
mas01cr@30 374 }
mas01cr@30 375
mas01cr@120 376 /* Make a new database.
mas01cr@120 377
mas01cr@120 378 The database consists of:
mas01cr@120 379
mas01cr@120 380 * a header (see dbTableHeader struct definition);
mas01cr@120 381 * keyTable: list of keys of tracks;
mas01cr@120 382 * trackTable: Maps implicit feature index to a feature vector
mas01cr@120 383 matrix (sizes of tracks)
mas01cr@120 384 * featureTable: Lots of doubles;
mas01cr@120 385 * timesTable: time points for each feature vector;
mas01cr@120 386 * l2normTable: squared l2norms for each feature vector.
mas01cr@120 387 */
mas01cr@0 388 void audioDB::create(const char* dbName){
mas01cr@31 389 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
mas01cr@32 390 error("Can't create database file", dbName, "open");
mas01cr@30 391 get_lock(dbfid, 1);
mas01cr@0 392
mas01cr@60 393 if(verbosity) {
mas01cr@0 394 cerr << "header size:" << O2_HEADERSIZE << endl;
mas01cr@60 395 }
mas01cr@0 396
mas01cr@0 397 dbH = new dbTableHeaderT();
mas01cr@0 398 assert(dbH);
mas01cr@0 399
mas01cr@140 400 unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE);
mas01cr@140 401
mas01cr@0 402 // Initialize header
mas01cr@108 403 dbH->magic = O2_MAGIC;
mas01cr@108 404 dbH->version = O2_FORMAT_VERSION;
mas01cr@108 405 dbH->numFiles = 0;
mas01cr@108 406 dbH->dim = 0;
mas01cr@108 407 dbH->flags = 0;
mas01cr@108 408 dbH->length = 0;
mas01cr@177 409 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
mas01cr@177 410 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles);
mas01cr@177 411 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles);
mas01cr@177 412 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
mas01cr@177 413 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
mas01cr@129 414 dbH->dbSize = size;
mas01cr@0 415
mas01cr@173 416 write(dbfid, dbH, O2_HEADERSIZE);
mas01cr@173 417
mas01cr@173 418 // go to the location corresponding to the last byte
mas01cr@173 419 if (lseek (dbfid, size - 1, SEEK_SET) == -1)
mas01cr@173 420 error("lseek error in db file", "", "lseek");
mas01cr@173 421
mas01cr@173 422 // write a dummy byte at the last location
mas01cr@173 423 if (write (dbfid, "", 1) != 1)
mas01cr@173 424 error("write error", "", "write");
mas01cr@173 425
mas01cr@60 426 if(verbosity) {
mas01cr@0 427 cerr << COM_CREATE << " " << dbName << endl;
mas01cr@60 428 }
mas01cr@0 429 }
mas01cr@0 430
mas01cr@0 431 void audioDB::drop(){
mas01cr@108 432 // FIXME: drop something? Should we even allow this?
mas01cr@0 433 }
mas01cr@0 434
mas01cr@177 435 void audioDB::initDBHeader(const char* dbName) {
mas01cr@106 436 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
mas01cr@32 437 error("Can't open database file", dbName, "open");
mas01cr@106 438 }
mas01cr@120 439
mas01cr@30 440 get_lock(dbfid, forWrite);
mas01cr@0 441 // Get the database header info
mas01cr@0 442 dbH = new dbTableHeaderT();
mas01cr@0 443 assert(dbH);
mas01cr@0 444
mas01cr@106 445 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
mas01cr@106 446 error("error reading db header", dbName, "read");
mas01cr@106 447 }
mas01cr@0 448
mas01cr@108 449 if(dbH->magic == O2_OLD_MAGIC) {
mas01cr@108 450 // FIXME: if anyone ever complains, write the program to convert
mas01cr@108 451 // from the old audioDB format to the new...
mas01cr@108 452 error("database file has old O2 header", dbName);
mas01cr@108 453 }
mas01cr@0 454
mas01cr@106 455 if(dbH->magic != O2_MAGIC) {
mas01cr@106 456 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl;
mas01cr@108 457 error("database file has incorrect header", dbName);
mas01cr@108 458 }
mas01cr@108 459
mas01cr@108 460 if(dbH->version != O2_FORMAT_VERSION) {
mas01cr@108 461 error("database file has incorect version", dbName);
mas01cr@0 462 }
mas01cr@0 463
mas01cr@128 464 // FIXME: when changing file format version, remove this workaround.
mas01cr@128 465 if(dbH->dbSize == 0) {
mas01cr@128 466 dbH->dbSize = O2_DEFAULTDBSIZE;
mas01cr@128 467 }
mas01cr@128 468
mas01cr@120 469 // Make some handy tables with correct types
mas01cr@178 470 #define CHECKED_MMAP(type, var, start, length) \
mas01cr@178 471 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
mas01cr@174 472 if(tmp == (void *) -1) { \
mas01cr@174 473 error("mmap error for db table", #var, "mmap"); \
mas01cr@174 474 } \
mas01cr@174 475 var = (type) tmp; \
mas01cr@174 476 }
mas01cr@174 477
mas01cr@176 478 CHECKED_MMAP(char *, db, 0, getpagesize());
mas01cr@176 479
mas01cr@178 480 if(forWrite || (dbH->length > 0)) {
mas01cr@178 481 if(forWrite) {
mas01cr@178 482 fileTableLength = dbH->trackTableOffset - dbH->fileTableOffset;
mas01cr@178 483 trackTableLength = dbH->dataOffset - dbH->trackTableOffset;
mas01cr@178 484 dataBufLength = dbH->timesTableOffset - dbH->dataOffset;
mas01cr@178 485 timesTableLength = dbH->l2normTableOffset - dbH->timesTableOffset;
mas01cr@178 486 l2normTableLength = dbH->dbSize - dbH->l2normTableOffset;
mas01cr@178 487 } else {
mas01cr@178 488 fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE);
mas01cr@178 489 trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE);
mas01cr@178 490 dataBufLength = ALIGN_PAGE_UP(dbH->length);
mas01cr@178 491 timesTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
mas01cr@178 492 l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
mas01cr@177 493 }
mas01cr@178 494 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength);
mas01cr@178 495 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength);
mas01cr@178 496 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
mas01cr@178 497 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, timesTableLength);
mas01cr@178 498 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength);
mas01cr@177 499 }
mas01cr@120 500 }
mas01cr@120 501
mas01cr@169 502 void audioDB::initInputFile (const char *inFile) {
mas01cr@169 503 if (inFile) {
mas01cr@169 504 if ((infid = open(inFile, O_RDONLY)) < 0) {
mas01cr@169 505 error("can't open input file for reading", inFile, "open");
mas01cr@169 506 }
mas01cr@169 507
mas01cr@169 508 if (fstat(infid, &statbuf) < 0) {
mas01cr@169 509 error("fstat error finding size of input", inFile, "fstat");
mas01cr@169 510 }
mas01cr@169 511
mas01cr@169 512 if(dbH->dim == 0 && dbH->length == 0) { // empty database
mas01cr@169 513 // initialize with input dimensionality
mas01cr@169 514 if(read(infid, &dbH->dim, sizeof(unsigned)) != sizeof(unsigned)) {
mas01cr@169 515 error("short read of input file", inFile);
mas01cr@169 516 }
mas01cr@169 517 if(dbH->dim == 0) {
mas01cr@169 518 error("dimensionality of zero in input file", inFile);
mas01cr@169 519 }
mas01cr@169 520 } else {
mas01cr@169 521 unsigned test;
mas01cr@169 522 if(read(infid, &test, sizeof(unsigned)) != sizeof(unsigned)) {
mas01cr@169 523 error("short read of input file", inFile);
mas01cr@169 524 }
mas01cr@169 525 if(dbH->dim == 0) {
mas01cr@169 526 error("dimensionality of zero in input file", inFile);
mas01cr@169 527 }
mas01cr@169 528 if(dbH->dim != test) {
mas01cr@169 529 cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl;
mas01cr@169 530 error("feature dimensions do not match database table dimensions", inFile);
mas01cr@169 531 }
mas01cr@169 532 }
mas01cr@169 533
mas01cr@169 534 if ((indata = (char *) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1) {
mas01cr@169 535 error("mmap error for input", inFile, "mmap");
mas01cr@169 536 }
mas01cr@169 537 }
mas01cr@169 538 }
mas01cr@169 539
mas01cr@177 540 void audioDB::initTables(const char* dbName, const char* inFile = 0) {
mas01cr@177 541 initDBHeader(dbName);
mas01cr@169 542 initInputFile(inFile);
mas01cr@0 543 }
mas01cr@0 544
mas01cr@180 545 bool audioDB::enough_data_space_free(off_t size) {
mas01cr@180 546 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
mas01cr@180 547 }
mas01cr@180 548
mas01cr@181 549 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
mas01cr@181 550 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
mas01cr@181 551 write(dbfid, buffer, size);
mas01cr@181 552 }
mas01cr@181 553
mas01cr@177 554 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@177 555 forWrite = true;
mas01cr@177 556 initTables(dbName, inFile);
mas01cr@0 557
mas01cr@0 558 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 559 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 560
mas01cr@180 561 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@169 562 error("Insert failed: no more room in database", inFile);
mas01cr@180 563 }
mas01cr@180 564
mas01cr@0 565 if(!key)
mas01cr@0 566 key=inFile;
mas01cr@0 567 // Linear scan of filenames check for pre-existing feature
mas01cr@0 568 unsigned alreadyInserted=0;
mas01cr@0 569 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 570 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){
mas01cr@0 571 alreadyInserted=1;
mas01cr@0 572 break;
mas01cr@0 573 }
mas01cr@0 574
mas01cr@0 575 if(alreadyInserted){
mas01cr@60 576 if(verbosity) {
mas01cr@0 577 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
mas01cr@60 578 }
mas01cr@0 579 return;
mas01cr@0 580 }
mas01cr@0 581
mas01mc@18 582 // Make a track index table of features to file indexes
mas01cr@0 583 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 584 if(!numVectors){
mas01cr@60 585 if(verbosity) {
mas01cr@0 586 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
mas01cr@60 587 }
mas01cr@0 588 // CLEAN UP
mas01cr@0 589 munmap(indata,statbuf.st_size);
mas01cr@128 590 munmap(db,dbH->dbSize);
mas01cr@0 591 close(infid);
mas01cr@0 592 return;
mas01cr@0 593 }
mas01cr@0 594
mas01cr@0 595 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
mas01cr@0 596
mas01cr@0 597 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 598
mas01cr@0 599 // Check times status and insert times from file
mas01cr@0 600 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 601 double* timesdata=timesTable+timesoffset;
mas01cr@0 602 assert(timesdata+numVectors<l2normTable);
mas01cr@0 603 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 604
mas01cr@0 605 // Increment file count
mas01cr@0 606 dbH->numFiles++;
mas01cr@0 607
mas01cr@0 608 // Update Header information
mas01cr@0 609 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 610
mas01mc@18 611 // Update track to file index map
mas01mc@18 612 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 613
mas01cr@181 614 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@0 615
mas01cr@0 616 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 617 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@176 618 unitNormAndInsertL2((double*)(((char *) dataBuf) + insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 619
mas01cr@0 620 // Report status
mas01cr@0 621 status(dbName);
mas01cr@60 622 if(verbosity) {
mas01cr@0 623 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
mas01cr@0 624 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;
mas01cr@60 625 }
mas01cr@0 626
mas01cr@176 627 // Copy the header back to the database
mas01cr@176 628 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@176 629
mas01cr@0 630 // CLEAN UP
mas01cr@0 631 munmap(indata,statbuf.st_size);
mas01cr@0 632 close(infid);
mas01cr@0 633 }
mas01cr@0 634
mas01cr@0 635 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){
mas01cr@0 636 unsigned numtimes=0;
mas01cr@0 637 if(usingTimes){
mas01cr@0 638 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles)
mas01cr@0 639 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@0 640 else if(!(dbH->flags&O2_FLAG_TIMES)){
mas01cr@0 641 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl;
mas01cr@0 642 usingTimes=0;
mas01cr@0 643 }
mas01cr@0 644
mas01cr@0 645 if(!timesFile->is_open()){
mas01cr@0 646 if(dbH->flags & O2_FLAG_TIMES){
mas01cr@0 647 munmap(indata,statbuf.st_size);
mas01cr@128 648 munmap(db,dbH->dbSize);
mas01cr@0 649 error("problem opening times file on timestamped database",timesFileName);
mas01cr@0 650 }
mas01cr@0 651 else{
mas01cr@0 652 cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl;
mas01cr@0 653 usingTimes=0;
mas01cr@0 654 }
mas01cr@0 655 }
mas01cr@0 656
mas01cr@0 657 // Process time file
mas01cr@0 658 if(usingTimes){
mas01cr@0 659 do{
mas01cr@0 660 *timesFile>>*timesdata++;
mas01cr@0 661 if(timesFile->eof())
mas01cr@0 662 break;
mas01cr@0 663 numtimes++;
mas01cr@0 664 }while(!timesFile->eof() && numtimes<numVectors);
mas01cr@0 665 if(!timesFile->eof()){
mas01cr@0 666 double dummy;
mas01cr@0 667 do{
mas01cr@0 668 *timesFile>>dummy;
mas01cr@0 669 if(timesFile->eof())
mas01cr@0 670 break;
mas01cr@0 671 numtimes++;
mas01cr@0 672 }while(!timesFile->eof());
mas01cr@0 673 }
mas01cr@0 674 if(numtimes<numVectors || numtimes>numVectors+2){
mas01cr@0 675 munmap(indata,statbuf.st_size);
mas01cr@128 676 munmap(db,dbH->dbSize);
mas01cr@0 677 close(infid);
mas01cr@0 678 cerr << "expected " << numVectors << " found " << numtimes << endl;
mas01cr@0 679 error("Times file is incorrect length for features file",inFile);
mas01cr@0 680 }
mas01cr@60 681 if(verbosity>2) {
mas01cr@0 682 cerr << "numtimes: " << numtimes << endl;
mas01cr@60 683 }
mas01cr@0 684 }
mas01cr@0 685 }
mas01cr@0 686 }
mas01cr@0 687
mas01cr@120 688 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@0 689
mas01cr@177 690 forWrite = true;
mas01cr@177 691 initDBHeader(dbName);
mas01cr@0 692
mas01cr@0 693 if(!key)
mas01cr@0 694 key=inFile;
mas01cr@0 695 ifstream *filesIn = 0;
mas01cr@0 696 ifstream *keysIn = 0;
mas01cr@0 697 ifstream* thisTimesFile = 0;
mas01cr@0 698
mas01cr@0 699 if(!(filesIn = new ifstream(inFile)))
mas01cr@0 700 error("Could not open batch in file", inFile);
mas01cr@0 701 if(key && key!=inFile)
mas01cr@0 702 if(!(keysIn = new ifstream(key)))
mas01cr@0 703 error("Could not open batch key file",key);
mas01cr@0 704
mas01cr@0 705 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 706 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 707
mas01cr@0 708 unsigned totalVectors=0;
mas01cr@0 709 char *thisKey = new char[MAXSTR];
mas01cr@0 710 char *thisFile = new char[MAXSTR];
mas01cr@0 711 char *thisTimesFileName = new char[MAXSTR];
mas01cr@120 712
mas01cr@0 713 do{
mas01cr@0 714 filesIn->getline(thisFile,MAXSTR);
mas01cr@0 715 if(key && key!=inFile)
mas01cr@0 716 keysIn->getline(thisKey,MAXSTR);
mas01cr@0 717 else
mas01cr@0 718 thisKey = thisFile;
mas01cr@0 719 if(usingTimes)
mas01cr@0 720 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@0 721
mas01cr@0 722 if(filesIn->eof())
mas01cr@0 723 break;
mas01cr@0 724
mas01cr@169 725 initInputFile(thisFile);
mas01cr@0 726
mas01cr@180 727 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
mas01cr@169 728 error("batchinsert failed: no more room in database", thisFile);
mas01cr@180 729 }
mas01cr@0 730
mas01cr@0 731 // Linear scan of filenames check for pre-existing feature
mas01cr@0 732 unsigned alreadyInserted=0;
mas01cr@0 733
mas01cr@0 734 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 735 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){
mas01cr@0 736 alreadyInserted=1;
mas01cr@0 737 break;
mas01cr@0 738 }
mas01cr@0 739
mas01cr@0 740 if(alreadyInserted){
mas01cr@60 741 if(verbosity) {
mas01cr@0 742 cerr << "Warning: key already exists in database:" << thisKey << endl;
mas01cr@60 743 }
mas01cr@0 744 }
mas01cr@0 745 else{
mas01cr@0 746
mas01mc@18 747 // Make a track index table of features to file indexes
mas01cr@0 748 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 749 if(!numVectors){
mas01cr@60 750 if(verbosity) {
mas01cr@0 751 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
mas01cr@60 752 }
mas01cr@0 753 }
mas01cr@0 754 else{
mas01cr@0 755 if(usingTimes){
mas01cr@0 756 if(timesFile->eof())
mas01cr@0 757 error("not enough timestamp files in timesList");
mas01cr@0 758 thisTimesFile=new ifstream(thisTimesFileName,ios::in);
mas01cr@0 759 if(!thisTimesFile->is_open())
mas01cr@0 760 error("Cannot open timestamp file",thisTimesFileName);
mas01cr@0 761 unsigned insertoffset=dbH->length;
mas01cr@0 762 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 763 double* timesdata=timesTable+timesoffset;
mas01cr@0 764 assert(timesdata+numVectors<l2normTable);
mas01cr@0 765 insertTimeStamps(numVectors,thisTimesFile,timesdata);
mas01cr@0 766 if(thisTimesFile)
mas01cr@0 767 delete thisTimesFile;
mas01cr@0 768 }
mas01cr@0 769
mas01cr@0 770 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
mas01cr@0 771
mas01cr@0 772 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 773
mas01cr@0 774 // Increment file count
mas01cr@0 775 dbH->numFiles++;
mas01cr@0 776
mas01cr@0 777 // Update Header information
mas01cr@0 778 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 779
mas01mc@18 780 // Update track to file index map
mas01mc@18 781 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 782
mas01cr@181 783 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
mas01cr@181 784
mas01cr@0 785 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 786 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@176 787 unitNormAndInsertL2((double*)(((char *) dataBuf) + insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 788
mas01cr@0 789 totalVectors+=numVectors;
mas01cr@176 790
mas01cr@176 791 // Copy the header back to the database
mas01cr@176 792 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@0 793 }
mas01cr@0 794 }
mas01cr@0 795 // CLEAN UP
mas01cr@0 796 munmap(indata,statbuf.st_size);
mas01cr@0 797 close(infid);
mas01cr@0 798 }while(!filesIn->eof());
mas01mc@12 799
mas01cr@60 800 if(verbosity) {
mas01cr@0 801 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
mas01cr@0 802 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
mas01cr@60 803 }
mas01cr@0 804
mas01cr@0 805 // Report status
mas01cr@0 806 status(dbName);
mas01cr@0 807 }
mas01cr@0 808
mas01cr@76 809 // FIXME: this can't propagate the sequence length argument (used for
mas01cr@76 810 // dudCount). See adb__status() definition for the other half of
mas01cr@76 811 // this. -- CSR, 2007-10-01
mas01cr@0 812 void audioDB::ws_status(const char*dbName, char* hostport){
mas01cr@0 813 struct soap soap;
mas01cr@133 814 adb__statusResponse adbStatusResponse;
mas01cr@0 815
mas01cr@0 816 // Query an existing adb database
mas01cr@0 817 soap_init(&soap);
mas01cr@133 818 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResponse)==SOAP_OK) {
mas01cr@133 819 cout << "numFiles = " << adbStatusResponse.result.numFiles << endl;
mas01cr@133 820 cout << "dim = " << adbStatusResponse.result.dim << endl;
mas01cr@133 821 cout << "length = " << adbStatusResponse.result.length << endl;
mas01cr@133 822 cout << "dudCount = " << adbStatusResponse.result.dudCount << endl;
mas01cr@133 823 cout << "nullCount = " << adbStatusResponse.result.nullCount << endl;
mas01cr@133 824 cout << "flags = " << adbStatusResponse.result.flags << endl;
mas01cr@76 825 } else {
mas01cr@0 826 soap_print_fault(&soap,stderr);
mas01cr@76 827 }
mas01cr@0 828
mas01cr@0 829 soap_destroy(&soap);
mas01cr@0 830 soap_end(&soap);
mas01cr@0 831 soap_done(&soap);
mas01cr@0 832 }
mas01cr@0 833
mas01mc@18 834 void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){
mas01cr@0 835 struct soap soap;
mas01cr@133 836 adb__queryResponse adbQueryResponse;
mas01cr@0 837
mas01cr@0 838 soap_init(&soap);
mas01cr@0 839 if(soap_call_adb__query(&soap,hostport,NULL,
mas01mc@18 840 (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName,
mas01cr@133 841 queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResponse)==SOAP_OK){
mas01cr@133 842 //std::cerr << "result list length:" << adbQueryResponse.result.__sizeRlist << std::endl;
mas01cr@133 843 for(int i=0; i<adbQueryResponse.result.__sizeRlist; i++)
mas01cr@133 844 std::cout << adbQueryResponse.result.Rlist[i] << " " << adbQueryResponse.result.Dist[i]
mas01cr@133 845 << " " << adbQueryResponse.result.Qpos[i] << " " << adbQueryResponse.result.Spos[i] << std::endl;
mas01cr@0 846 }
mas01cr@0 847 else
mas01cr@0 848 soap_print_fault(&soap,stderr);
mas01cr@0 849
mas01cr@0 850 soap_destroy(&soap);
mas01cr@0 851 soap_end(&soap);
mas01cr@0 852 soap_done(&soap);
mas01cr@0 853
mas01cr@0 854 }
mas01cr@0 855
mas01cr@0 856
mas01cr@133 857 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
mas01cr@0 858 if(!dbH)
mas01cr@177 859 initTables(dbName, 0);
mas01cr@0 860
mas01cr@0 861 unsigned dudCount=0;
mas01cr@0 862 unsigned nullCount=0;
mas01cr@0 863 for(unsigned k=0; k<dbH->numFiles; k++){
mas01mc@18 864 if(trackTable[k]<sequenceLength){
mas01cr@0 865 dudCount++;
mas01mc@18 866 if(!trackTable[k])
mas01cr@76 867 nullCount++;
mas01cr@0 868 }
mas01cr@0 869 }
mas01cr@76 870
mas01cr@133 871 if(adbStatusResponse == 0) {
mas01cr@76 872
mas01cr@76 873 // Update Header information
mas01cr@76 874 cout << "num files:" << dbH->numFiles << endl;
mas01cr@76 875 cout << "data dim:" << dbH->dim <<endl;
mas01cr@76 876 if(dbH->dim>0){
mas01cr@76 877 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
mas01cr@108 878 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
mas01cr@76 879 }
mas01cr@108 880 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl;
mas01cr@108 881 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
mas01cr@108 882 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl;
mas01cr@76 883 cout << "flags:" << dbH->flags << endl;
mas01cr@76 884
mas01cr@76 885 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
mas01cr@76 886 } else {
mas01cr@133 887 adbStatusResponse->result.numFiles = dbH->numFiles;
mas01cr@133 888 adbStatusResponse->result.dim = dbH->dim;
mas01cr@133 889 adbStatusResponse->result.length = dbH->length;
mas01cr@133 890 adbStatusResponse->result.dudCount = dudCount;
mas01cr@133 891 adbStatusResponse->result.nullCount = nullCount;
mas01cr@133 892 adbStatusResponse->result.flags = dbH->flags;
mas01cr@76 893 }
mas01cr@0 894 }
mas01cr@0 895
mas01cr@0 896 void audioDB::dump(const char* dbName){
mas01cr@131 897 if(!dbH) {
mas01cr@177 898 initTables(dbName, 0);
mas01mc@17 899 }
mas01cr@0 900
mas01cr@131 901 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
mas01cr@131 902 error("error making output directory", output, "mkdir");
mas01cr@131 903 }
mas01cr@131 904
mas01cr@131 905 char *cwd = new char[PATH_MAX];
mas01cr@131 906
mas01cr@131 907 if ((getcwd(cwd, PATH_MAX)) == 0) {
mas01cr@131 908 error("error getting working directory", "", "getcwd");
mas01cr@131 909 }
mas01cr@131 910
mas01cr@131 911 if((chdir(output)) < 0) {
mas01cr@131 912 error("error changing working directory", output, "chdir");
mas01cr@131 913 }
mas01cr@131 914
mas01cr@131 915 int fLfd, tLfd = 0, kLfd;
mas01cr@131 916 FILE *fLFile, *tLFile = 0, *kLFile;
mas01cr@131 917
mas01cr@131 918 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 919 error("error creating featureList file", "featureList.txt", "open");
mas01cr@131 920 }
mas01cr@131 921 int times = dbH->flags & O2_FLAG_TIMES;
mas01cr@131 922 if (times) {
mas01cr@131 923 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 924 error("error creating timesList file", "timesList.txt", "open");
mas01cr@131 925 }
mas01cr@131 926 }
mas01cr@131 927 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 928 error("error creating keyList file", "keyList.txt", "open");
mas01cr@131 929 }
mas01cr@131 930
mas01cr@131 931 /* can these fail? I sincerely hope not. */
mas01cr@131 932 fLFile = fdopen(fLfd, "w");
mas01cr@131 933 if (times) {
mas01cr@131 934 tLFile = fdopen(tLfd, "w");
mas01cr@131 935 }
mas01cr@131 936 kLFile = fdopen(kLfd, "w");
mas01cr@131 937
mas01cr@131 938 char *fName = new char[256];
mas01cr@131 939 int ffd;
mas01cr@131 940 FILE *tFile;
mas01cr@131 941 unsigned pos = 0;
mas01cr@184 942 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@184 943 double *data_buffer;
mas01cr@184 944 size_t data_buffer_size;
mas01cr@131 945 for(unsigned k = 0; k < dbH->numFiles; k++) {
mas01cr@131 946 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE);
mas01cr@131 947 snprintf(fName, 256, "%05d.features", k);
mas01cr@131 948 if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 949 error("error creating feature file", fName, "open");
mas01cr@131 950 }
mas01cr@131 951 if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) {
mas01cr@131 952 error("error writing dimensions", fName, "write");
mas01cr@131 953 }
mas01cr@184 954
mas01cr@184 955 /* FIXME: this repeated malloc()/free() of data buffers is
mas01cr@184 956 inefficient. */
mas01cr@184 957 data_buffer_size = trackTable[k] * dbH->dim * sizeof(double);
mas01cr@184 958
mas01cr@184 959 {
mas01cr@184 960 void *tmp = malloc(data_buffer_size);
mas01cr@184 961 if (tmp == NULL) {
mas01cr@184 962 error("error allocating data buffer");
mas01cr@184 963 }
mas01cr@184 964 data_buffer = (double *) tmp;
mas01cr@184 965 }
mas01cr@184 966
mas01cr@184 967 if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) {
mas01cr@184 968 error("error reading data", fName, "read");
mas01cr@184 969 }
mas01cr@184 970
mas01cr@184 971 if ((write(ffd, data_buffer, data_buffer_size)) < 0) {
mas01cr@131 972 error("error writing data", fName, "write");
mas01cr@131 973 }
mas01cr@184 974
mas01cr@184 975 free(data_buffer);
mas01cr@184 976
mas01cr@131 977 fprintf(fLFile, "%s\n", fName);
mas01cr@131 978 close(ffd);
mas01cr@131 979
mas01cr@131 980 if(times) {
mas01cr@131 981 snprintf(fName, 256, "%05d.times", k);
mas01cr@131 982 tFile = fopen(fName, "w");
mas01cr@131 983 for(unsigned i = 0; i < trackTable[k]; i++) {
mas01cr@131 984 // KLUDGE: specifying 16 digits of precision after the decimal
mas01cr@131 985 // point is (but check this!) sufficient to uniquely identify
mas01cr@131 986 // doubles; however, that will cause ugliness, as that's
mas01cr@131 987 // vastly too many for most values of interest. Moving to %a
mas01cr@131 988 // here and scanf() in the timesFile reading might fix this.
mas01cr@131 989 // -- CSR, 2007-10-19
mas01cr@131 990 fprintf(tFile, "%.16e\n", *(timesTable + pos + i));
mas01cr@131 991 }
mas01cr@131 992 fprintf(tLFile, "%s\n", fName);
mas01cr@131 993 }
mas01cr@131 994
mas01cr@131 995 pos += trackTable[k];
mas01cr@131 996 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
mas01cr@131 997 }
mas01cr@131 998
mas01cr@131 999 FILE *scriptFile;
mas01cr@131 1000 scriptFile = fopen("restore.sh", "w");
mas01cr@131 1001 fprintf(scriptFile, "\
mas01cr@131 1002 #! /bin/sh\n\
mas01cr@131 1003 #\n\
mas01cr@131 1004 # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\
mas01cr@131 1005 \n\
mas01cr@131 1006 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
mas01cr@131 1007 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
mas01cr@131 1008 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000);
mas01cr@131 1009 if(dbH->flags & O2_FLAG_L2NORM) {
mas01cr@131 1010 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
mas01cr@131 1011 }
mas01cr@131 1012 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt");
mas01cr@131 1013 if(times) {
mas01cr@131 1014 fprintf(scriptFile, " -T timesList.txt");
mas01cr@131 1015 }
mas01cr@131 1016 fprintf(scriptFile, "\n");
mas01cr@131 1017 fclose(scriptFile);
mas01cr@131 1018
mas01cr@131 1019 if((chdir(cwd)) < 0) {
mas01cr@131 1020 error("error changing working directory", cwd, "chdir");
mas01cr@131 1021 }
mas01cr@131 1022
mas01cr@131 1023 fclose(fLFile);
mas01cr@131 1024 if(times) {
mas01cr@131 1025 fclose(tLFile);
mas01cr@131 1026 }
mas01cr@131 1027 fclose(kLFile);
mas01cr@131 1028 delete[] fName;
mas01cr@131 1029
mas01cr@0 1030 status(dbName);
mas01cr@0 1031 }
mas01cr@0 1032
mas01cr@177 1033 void audioDB::l2norm(const char* dbName) {
mas01cr@177 1034 forWrite = true;
mas01cr@177 1035 initTables(dbName, 0);
mas01cr@0 1036 if(dbH->length>0){
mas01cr@0 1037 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@0 1038 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 1039 }
mas01cr@0 1040 // Update database flags
mas01cr@0 1041 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 1042 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 1043 }
mas01cr@0 1044
mas01cr@133 1045 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@0 1046 switch(queryType){
mas01cr@105 1047 case O2_POINT_QUERY:
mas01cr@133 1048 pointQuery(dbName, inFile, adbQueryResponse);
mas01cr@0 1049 break;
mas01cr@105 1050 case O2_SEQUENCE_QUERY:
mas01mc@17 1051 if(radius==0)
mas01cr@133 1052 trackSequenceQueryNN(dbName, inFile, adbQueryResponse);
mas01mc@17 1053 else
mas01cr@133 1054 trackSequenceQueryRad(dbName, inFile, adbQueryResponse);
mas01cr@0 1055 break;
mas01cr@105 1056 case O2_TRACK_QUERY:
mas01cr@133 1057 trackPointQuery(dbName, inFile, adbQueryResponse);
mas01cr@0 1058 break;
mas01cr@0 1059 default:
mas01cr@0 1060 error("unrecognized queryType in query()");
mas01cr@0 1061
mas01cr@0 1062 }
mas01cr@0 1063 }
mas01cr@0 1064
mas01cr@0 1065 //return ordinal position of key in keyTable
mas01cr@0 1066 unsigned audioDB::getKeyPos(char* key){
mas01cr@0 1067 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 1068 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
mas01cr@0 1069 return k;
mas01cr@0 1070 error("Key not found",key);
mas01cr@0 1071 return O2_ERR_KEYNOTFOUND;
mas01cr@0 1072 }
mas01cr@0 1073
mas01cr@0 1074 // Basic point query engine
mas01cr@177 1075 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@0 1076
mas01cr@177 1077 initTables(dbName, inFile);
mas01cr@0 1078
mas01cr@0 1079 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1080 // we use stdout in this stub version
mas01cr@0 1081 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1082
mas01cr@0 1083 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1084 double* data = dataBuf;
mas01cr@0 1085 double* queryCopy = 0;
mas01cr@0 1086
mas01cr@0 1087 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 1088 // Make a copy of the query
mas01cr@0 1089 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1090 qNorm = new double[numVectors];
mas01cr@0 1091 assert(queryCopy&&qNorm);
mas01cr@0 1092 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1093 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1094 query = queryCopy;
mas01cr@0 1095 }
mas01cr@0 1096
mas01cr@0 1097 // Make temporary dynamic memory for results
mas01cr@0 1098 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@0 1099 double distances[pointNN];
mas01cr@0 1100 unsigned qIndexes[pointNN];
mas01cr@0 1101 unsigned sIndexes[pointNN];
mas01cr@0 1102 for(unsigned k=0; k<pointNN; k++){
mas01cr@62 1103 distances[k]=-DBL_MAX;
mas01cr@0 1104 qIndexes[k]=~0;
mas01cr@0 1105 sIndexes[k]=~0;
mas01cr@0 1106 }
mas01cr@0 1107
mas01cr@0 1108 unsigned j=numVectors;
mas01cr@0 1109 unsigned k,l,n;
mas01cr@0 1110 double thisDist;
mas01cr@0 1111
mas01cr@0 1112 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
mas01cr@0 1113 double meanQdur = 0;
mas01cr@0 1114 double* timesdata = 0;
mas01cr@0 1115 double* dbdurs = 0;
mas01cr@0 1116
mas01cr@0 1117 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1118 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1119 usingTimes=0;
mas01cr@0 1120 }
mas01cr@0 1121
mas01cr@0 1122 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1123 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1124
mas01cr@0 1125 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1126 timesdata = new double[numVectors];
mas01cr@0 1127 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1128 // Calculate durations of points
mas01cr@0 1129 for(k=0; k<numVectors-1; k++){
mas01cr@0 1130 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1131 meanQdur+=timesdata[k];
mas01cr@0 1132 }
mas01cr@0 1133 meanQdur/=k;
mas01cr@0 1134 // Individual exhaustive timepoint durations
mas01cr@0 1135 dbdurs = new double[totalVecs];
mas01cr@0 1136 for(k=0; k<totalVecs-1; k++)
mas01cr@0 1137 dbdurs[k]=timesTable[k+1]-timesTable[k];
mas01cr@0 1138 j--; // decrement vector counter by one
mas01cr@0 1139 }
mas01cr@0 1140
mas01cr@0 1141 if(usingQueryPoint)
mas01cr@0 1142 if(queryPoint>numVectors-1)
mas01cr@0 1143 error("queryPoint > numVectors in query");
mas01cr@0 1144 else{
mas01cr@60 1145 if(verbosity>1) {
mas01cr@0 1146 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 1147 }
mas01cr@0 1148 query=query+queryPoint*dbH->dim;
mas01cr@0 1149 numVectors=queryPoint+1;
mas01cr@0 1150 j=1;
mas01cr@0 1151 }
mas01cr@0 1152
mas01cr@0 1153 gettimeofday(&tv1, NULL);
mas01cr@0 1154 while(j--){ // query
mas01cr@0 1155 data=dataBuf;
mas01cr@0 1156 k=totalVecs; // number of database vectors
mas01cr@0 1157 while(k--){ // database
mas01cr@0 1158 thisDist=0;
mas01cr@0 1159 l=dbH->dim;
mas01cr@0 1160 double* q=query;
mas01cr@0 1161 while(l--)
mas01cr@0 1162 thisDist+=*q++**data++;
mas01cr@0 1163 if(!usingTimes ||
mas01cr@0 1164 (usingTimes
mas01cr@0 1165 && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){
mas01cr@0 1166 n=pointNN;
mas01cr@0 1167 while(n--){
mas01cr@0 1168 if(thisDist>=distances[n]){
mas01cr@0 1169 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1170 // Copy all values above up the queue
mas01cr@0 1171 for( l=pointNN-1 ; l >= n+1 ; l--){
mas01cr@0 1172 distances[l]=distances[l-1];
mas01cr@0 1173 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1174 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1175 }
mas01cr@0 1176 distances[n]=thisDist;
mas01cr@0 1177 qIndexes[n]=numVectors-j-1;
mas01cr@0 1178 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
mas01cr@0 1179 break;
mas01cr@0 1180 }
mas01cr@0 1181 }
mas01cr@0 1182 else
mas01cr@0 1183 break;
mas01cr@0 1184 }
mas01cr@0 1185 }
mas01cr@0 1186 }
mas01cr@0 1187 // Move query pointer to next query point
mas01cr@0 1188 query+=dbH->dim;
mas01cr@0 1189 }
mas01cr@0 1190
mas01cr@0 1191 gettimeofday(&tv2, NULL);
mas01cr@60 1192 if(verbosity>1) {
mas01cr@0 1193 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@60 1194 }
mas01cr@0 1195
mas01cr@133 1196 if(adbQueryResponse==0){
mas01cr@0 1197 // Output answer
mas01cr@0 1198 // Loop over nearest neighbours
mas01cr@0 1199 for(k=0; k < pointNN; k++){
mas01cr@0 1200 // Scan for key
mas01mc@18 1201 unsigned cumTrack=0;
mas01cr@0 1202 for(l=0 ; l<dbH->numFiles; l++){
mas01mc@18 1203 cumTrack+=trackTable[l];
mas01mc@18 1204 if(sIndexes[k]<cumTrack){
mas01cr@0 1205 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
mas01mc@18 1206 << sIndexes[k]+trackTable[l]-cumTrack << endl;
mas01cr@0 1207 break;
mas01cr@0 1208 }
mas01cr@0 1209 }
mas01cr@0 1210 }
mas01cr@0 1211 }
mas01cr@0 1212 else{ // Process Web Services Query
mas01cr@89 1213 int listLen;
mas01cr@89 1214 for(k = 0; k < pointNN; k++) {
mas01cr@89 1215 if(distances[k] == -DBL_MAX)
mas01cr@89 1216 break;
mas01cr@89 1217 }
mas01cr@89 1218 listLen = k;
mas01cr@89 1219
mas01cr@133 1220 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 1221 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 1222 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 1223 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 1224 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 1225 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 1226 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 1227 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 1228 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 1229 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 1230 adbQueryResponse->result.Dist[k]=distances[k];
mas01cr@133 1231 adbQueryResponse->result.Qpos[k]=qIndexes[k];
mas01mc@18 1232 unsigned cumTrack=0;
mas01cr@0 1233 for(l=0 ; l<dbH->numFiles; l++){
mas01mc@18 1234 cumTrack+=trackTable[l];
mas01mc@18 1235 if(sIndexes[k]<cumTrack){
mas01cr@133 1236 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
mas01cr@0 1237 break;
mas01cr@0 1238 }
mas01cr@0 1239 }
mas01cr@133 1240 adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
mas01cr@0 1241 }
mas01cr@0 1242 }
mas01cr@0 1243
mas01cr@0 1244 // Clean up
mas01cr@0 1245 if(queryCopy)
mas01cr@0 1246 delete queryCopy;
mas01cr@0 1247 if(qNorm)
mas01cr@0 1248 delete qNorm;
mas01cr@0 1249 if(timesdata)
mas01cr@0 1250 delete timesdata;
mas01cr@0 1251 if(dbdurs)
mas01cr@0 1252 delete dbdurs;
mas01cr@0 1253 }
mas01cr@0 1254
mas01mc@18 1255 // trackPointQuery
mas01mc@18 1256 // return the trackNN closest tracks to the query track
mas01mc@18 1257 // uses average of pointNN points per track
mas01cr@177 1258 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@177 1259 initTables(dbName, inFile);
mas01cr@0 1260
mas01cr@0 1261 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1262 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1263 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1264 double* data = dataBuf;
mas01cr@0 1265 double* queryCopy = 0;
mas01cr@0 1266
mas01cr@0 1267 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 1268 // Make a copy of the query
mas01cr@0 1269 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1270 qNorm = new double[numVectors];
mas01cr@0 1271 assert(queryCopy&&qNorm);
mas01cr@0 1272 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1273 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1274 query = queryCopy;
mas01cr@0 1275 }
mas01cr@0 1276
mas01cr@0 1277 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@18 1278 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@0 1279
mas01cr@0 1280 // Make temporary dynamic memory for results
mas01mc@18 1281 double trackDistances[trackNN];
mas01mc@18 1282 unsigned trackIDs[trackNN];
mas01mc@18 1283 unsigned trackQIndexes[trackNN];
mas01mc@18 1284 unsigned trackSIndexes[trackNN];
mas01cr@0 1285
mas01cr@0 1286 double distances[pointNN];
mas01cr@0 1287 unsigned qIndexes[pointNN];
mas01cr@0 1288 unsigned sIndexes[pointNN];
mas01cr@0 1289
mas01cr@0 1290 unsigned j=numVectors; // number of query points
mas01mc@18 1291 unsigned k,l,n, track, trackOffset=0, processedTracks=0;
mas01cr@0 1292 double thisDist;
mas01cr@0 1293
mas01cr@0 1294 for(k=0; k<pointNN; k++){
mas01cr@66 1295 distances[k]=-DBL_MAX;
mas01cr@0 1296 qIndexes[k]=~0;
mas01cr@0 1297 sIndexes[k]=~0;
mas01cr@0 1298 }
mas01cr@0 1299
mas01mc@18 1300 for(k=0; k<trackNN; k++){
mas01cr@66 1301 trackDistances[k]=-DBL_MAX;
mas01mc@18 1302 trackQIndexes[k]=~0;
mas01mc@18 1303 trackSIndexes[k]=~0;
mas01mc@18 1304 trackIDs[k]=~0;
mas01cr@0 1305 }
mas01cr@0 1306
mas01cr@0 1307 double meanQdur = 0;
mas01cr@0 1308 double* timesdata = 0;
mas01cr@0 1309 double* meanDBdur = 0;
mas01cr@0 1310
mas01cr@0 1311 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1312 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1313 usingTimes=0;
mas01cr@0 1314 }
mas01cr@0 1315
mas01cr@0 1316 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1317 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1318
mas01cr@0 1319 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1320 timesdata = new double[numVectors];
mas01cr@0 1321 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1322 // Calculate durations of points
mas01cr@0 1323 for(k=0; k<numVectors-1; k++){
mas01cr@0 1324 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1325 meanQdur+=timesdata[k];
mas01cr@0 1326 }
mas01cr@0 1327 meanQdur/=k;
mas01cr@0 1328 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1329 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1330 meanDBdur[k]=0.0;
mas01mc@18 1331 for(j=0; j<trackTable[k]-1 ; j++)
mas01cr@0 1332 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1333 meanDBdur[k]/=j;
mas01cr@0 1334 }
mas01cr@0 1335 }
mas01cr@0 1336
mas01cr@0 1337 if(usingQueryPoint)
mas01cr@0 1338 if(queryPoint>numVectors-1)
mas01cr@0 1339 error("queryPoint > numVectors in query");
mas01cr@0 1340 else{
mas01cr@60 1341 if(verbosity>1) {
mas01cr@0 1342 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 1343 }
mas01cr@0 1344 query=query+queryPoint*dbH->dim;
mas01cr@0 1345 numVectors=queryPoint+1;
mas01cr@0 1346 }
mas01cr@0 1347
mas01mc@18 1348 // build track offset table
mas01mc@18 1349 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01mc@18 1350 unsigned cumTrack=0;
mas01mc@18 1351 unsigned trackIndexOffset;
mas01cr@0 1352 for(k=0; k<dbH->numFiles;k++){
mas01mc@18 1353 trackOffsetTable[k]=cumTrack;
mas01mc@18 1354 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@0 1355 }
mas01cr@0 1356
mas01cr@0 1357 char nextKey[MAXSTR];
mas01cr@0 1358
mas01cr@0 1359 gettimeofday(&tv1, NULL);
mas01cr@0 1360
mas01mc@18 1361 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01mc@18 1362 if(trackFile){
mas01mc@18 1363 if(!trackFile->eof()){
mas01mc@18 1364 trackFile->getline(nextKey,MAXSTR);
mas01mc@18 1365 track=getKeyPos(nextKey);
mas01cr@0 1366 }
mas01cr@0 1367 else
mas01cr@0 1368 break;
mas01cr@0 1369 }
mas01mc@18 1370 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01mc@18 1371 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@60 1372 if(verbosity>7) {
mas01mc@18 1373 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@60 1374 }
mas01cr@0 1375
mas01cr@0 1376 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 1377 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
mas01cr@0 1378 else
mas01cr@0 1379 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
mas01cr@0 1380 if(usingQueryPoint)
mas01cr@0 1381 j=1;
mas01cr@0 1382 else
mas01cr@0 1383 j=numVectors;
mas01cr@0 1384 while(j--){
mas01mc@18 1385 k=trackTable[track]; // number of vectors in track
mas01mc@18 1386 data=dataBuf+trackOffset; // data for track
mas01cr@0 1387 while(k--){
mas01cr@0 1388 thisDist=0;
mas01cr@0 1389 l=dbH->dim;
mas01cr@0 1390 double* q=query;
mas01cr@0 1391 while(l--)
mas01cr@0 1392 thisDist+=*q++**data++;
mas01cr@0 1393 if(!usingTimes ||
mas01cr@0 1394 (usingTimes
mas01mc@18 1395 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1396 n=pointNN;
mas01cr@0 1397 while(n--){
mas01cr@0 1398 if(thisDist>=distances[n]){
mas01cr@0 1399 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1400 // Copy all values above up the queue
mas01cr@0 1401 for( l=pointNN-1 ; l > n ; l--){
mas01cr@0 1402 distances[l]=distances[l-1];
mas01cr@0 1403 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1404 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1405 }
mas01cr@0 1406 distances[n]=thisDist;
mas01cr@0 1407 qIndexes[n]=numVectors-j-1;
mas01mc@18 1408 sIndexes[n]=trackTable[track]-k-1;
mas01cr@0 1409 break;
mas01cr@0 1410 }
mas01cr@0 1411 }
mas01cr@0 1412 else
mas01cr@0 1413 break;
mas01cr@0 1414 }
mas01cr@0 1415 }
mas01mc@18 1416 } // track
mas01cr@0 1417 // Move query pointer to next query point
mas01cr@0 1418 query+=dbH->dim;
mas01cr@0 1419 } // query
mas01mc@18 1420 // Take the average of this track's distance
mas01mc@18 1421 // Test the track distances
mas01cr@0 1422 thisDist=0;
mas01cr@66 1423 for (n = 0; n < pointNN; n++) {
mas01cr@66 1424 if (distances[n] == -DBL_MAX) break;
mas01cr@66 1425 thisDist += distances[n];
mas01cr@66 1426 }
mas01cr@66 1427 thisDist /= n;
mas01cr@66 1428
mas01mc@18 1429 n=trackNN;
mas01cr@0 1430 while(n--){
mas01mc@18 1431 if(thisDist>=trackDistances[n]){
mas01mc@18 1432 if((n==0 || thisDist<=trackDistances[n-1])){
mas01cr@0 1433 // Copy all values above up the queue
mas01cr@74 1434 for( l=trackNN-1 ; l > n ; l--){
mas01mc@18 1435 trackDistances[l]=trackDistances[l-1];
mas01mc@18 1436 trackQIndexes[l]=trackQIndexes[l-1];
mas01mc@18 1437 trackSIndexes[l]=trackSIndexes[l-1];
mas01mc@18 1438 trackIDs[l]=trackIDs[l-1];
mas01cr@0 1439 }
mas01mc@18 1440 trackDistances[n]=thisDist;
mas01mc@18 1441 trackQIndexes[n]=qIndexes[0];
mas01mc@18 1442 trackSIndexes[n]=sIndexes[0];
mas01mc@18 1443 trackIDs[n]=track;
mas01cr@0 1444 break;
mas01cr@0 1445 }
mas01cr@0 1446 }
mas01cr@0 1447 else
mas01cr@0 1448 break;
mas01cr@0 1449 }
mas01cr@0 1450 for(unsigned k=0; k<pointNN; k++){
mas01cr@66 1451 distances[k]=-DBL_MAX;
mas01cr@0 1452 qIndexes[k]=~0;
mas01cr@0 1453 sIndexes[k]=~0;
mas01cr@0 1454 }
mas01mc@18 1455 } // tracks
mas01cr@0 1456 gettimeofday(&tv2, NULL);
mas01cr@0 1457
mas01cr@60 1458 if(verbosity>1) {
mas01mc@18 1459 cerr << endl << "processed tracks :" << processedTracks
mas01cr@0 1460 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@60 1461 }
mas01cr@0 1462
mas01cr@133 1463 if(adbQueryResponse==0){
mas01cr@60 1464 if(verbosity>1) {
mas01cr@0 1465 cerr<<endl;
mas01cr@60 1466 }
mas01cr@0 1467 // Output answer
mas01cr@0 1468 // Loop over nearest neighbours
mas01mc@18 1469 for(k=0; k < min(trackNN,processedTracks); k++)
mas01mc@18 1470 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE
mas01mc@18 1471 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
mas01cr@0 1472 }
mas01cr@0 1473 else{ // Process Web Services Query
mas01mc@18 1474 int listLen = min(trackNN, processedTracks);
mas01cr@133 1475 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 1476 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 1477 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 1478 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 1479 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 1480 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 1481 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 1482 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 1483 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 1484 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 1485 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@133 1486 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@133 1487 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@133 1488 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@0 1489 }
mas01cr@0 1490 }
mas01cr@0 1491
mas01cr@0 1492
mas01cr@0 1493 // Clean up
mas01mc@18 1494 if(trackOffsetTable)
mas01mc@18 1495 delete trackOffsetTable;
mas01cr@0 1496 if(queryCopy)
mas01cr@0 1497 delete queryCopy;
mas01cr@0 1498 if(qNorm)
mas01cr@0 1499 delete qNorm;
mas01cr@0 1500 if(timesdata)
mas01cr@0 1501 delete timesdata;
mas01cr@0 1502 if(meanDBdur)
mas01cr@0 1503 delete meanDBdur;
mas01cr@0 1504
mas01cr@0 1505 }
mas01cr@0 1506
mas01cr@0 1507
mas01mc@20 1508 // k nearest-neighbor (k-NN) search between query and target tracks
mas01mc@20 1509 // efficient implementation based on matched filter
mas01mc@20 1510 // assumes normed shingles
mas01mc@20 1511 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
mas01cr@133 1512 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@0 1513
mas01cr@177 1514 initTables(dbName, inFile);
mas01cr@0 1515
mas01cr@0 1516 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1517 // we use stdout in this stub version
mas01cr@0 1518 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1519 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1520 double* queryCopy = 0;
mas01cr@0 1521
mas01cr@0 1522 double qMeanL2;
mas01cr@0 1523 double* sMeanL2;
mas01cr@0 1524
mas01cr@0 1525 unsigned USE_THRESH=0;
mas01cr@0 1526 double SILENCE_THRESH=0;
mas01cr@0 1527 double DIFF_THRESH=0;
mas01cr@0 1528
mas01cr@0 1529 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@55 1530 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@55 1531
mas01cr@55 1532 if(numVectors<sequenceLength)
mas01cr@55 1533 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@0 1534
mas01cr@60 1535 if(verbosity>1) {
mas01cr@0 1536 cerr << "performing norms ... "; cerr.flush();
mas01cr@60 1537 }
mas01cr@0 1538 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01mc@20 1539
mas01cr@0 1540 // Make a copy of the query
mas01cr@0 1541 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1542 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1543 qNorm = new double[numVectors];
mas01cr@0 1544 sNorm = new double[dbVectors];
mas01cr@0 1545 sMeanL2=new double[dbH->numFiles];
mas01cr@0 1546 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
mas01cr@0 1547 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1548 query = queryCopy;
mas01mc@20 1549
mas01cr@0 1550 // Make norm measurements relative to sequenceLength
mas01cr@0 1551 unsigned w = sequenceLength-1;
mas01cr@0 1552 unsigned i,j;
mas01cr@0 1553 double* ps;
mas01cr@0 1554 double tmp1,tmp2;
mas01mc@20 1555
mas01cr@0 1556 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@0 1557 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@125 1558 double* qnPtr = qNorm;
mas01cr@0 1559 double* snPtr = sNorm;
mas01cr@0 1560 for(i=0; i<dbH->numFiles; i++){
mas01mc@20 1561 if(trackTable[i]>=sequenceLength){
mas01cr@0 1562 tmp1=*snPtr;
mas01cr@0 1563 j=1;
mas01cr@0 1564 w=sequenceLength-1;
mas01cr@0 1565 while(w--)
mas01cr@0 1566 *snPtr+=snPtr[j++];
mas01cr@0 1567 ps = snPtr+1;
mas01mc@18 1568 w=trackTable[i]-sequenceLength; // +1 - 1
mas01cr@0 1569 while(w--){
mas01cr@0 1570 tmp2=*ps;
mas01mc@20 1571 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01cr@0 1572 tmp1=tmp2;
mas01cr@0 1573 ps++;
mas01cr@0 1574 }
mas01mc@20 1575 ps = snPtr;
mas01mc@20 1576 w=trackTable[i]-sequenceLength+1;
mas01mc@20 1577 while(w--){
mas01mc@20 1578 *ps=sqrt(*ps);
mas01mc@20 1579 ps++;
mas01mc@20 1580 }
mas01cr@0 1581 }
mas01mc@18 1582 snPtr+=trackTable[i];
mas01cr@0 1583 }
mas01cr@0 1584
mas01cr@0 1585 double* pn = sMeanL2;
mas01cr@0 1586 w=dbH->numFiles;
mas01cr@0 1587 while(w--)
mas01cr@0 1588 *pn++=0.0;
mas01cr@0 1589 ps=sNorm;
mas01mc@18 1590 unsigned processedTracks=0;
mas01cr@0 1591 for(i=0; i<dbH->numFiles; i++){
mas01mc@18 1592 if(trackTable[i]>sequenceLength-1){
mas01cr@57 1593 w = trackTable[i]-sequenceLength+1;
mas01cr@0 1594 pn = sMeanL2+i;
mas01mc@20 1595 *pn=0;
mas01cr@0 1596 while(w--)
mas01mc@20 1597 if(*ps>0)
mas01mc@20 1598 *pn+=*ps++;
mas01cr@57 1599 *pn/=trackTable[i]-sequenceLength+1;
mas01cr@0 1600 SILENCE_THRESH+=*pn;
mas01mc@18 1601 processedTracks++;
mas01cr@0 1602 }
mas01mc@18 1603 ps = sNorm + trackTable[i];
mas01cr@0 1604 }
mas01cr@60 1605 if(verbosity>1) {
mas01mc@18 1606 cerr << "processedTracks: " << processedTracks << endl;
mas01cr@60 1607 }
mas01mc@20 1608
mas01mc@18 1609 SILENCE_THRESH/=processedTracks;
mas01cr@0 1610 USE_THRESH=1; // Turn thresholding on
mas01mc@20 1611 DIFF_THRESH=SILENCE_THRESH; // mean shingle power
mas01mc@20 1612 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
mas01cr@60 1613 if(verbosity>4) {
mas01mc@20 1614 cerr << "silence thresh: " << SILENCE_THRESH;
mas01cr@60 1615 }
mas01cr@0 1616 w=sequenceLength-1;
mas01cr@0 1617 i=1;
mas01cr@125 1618 tmp1=*qnPtr;
mas01cr@0 1619 while(w--)
mas01cr@125 1620 *qnPtr+=qnPtr[i++];
mas01cr@125 1621 ps = qnPtr+1;
mas01mc@20 1622 w=numVectors-sequenceLength; // +1 -1
mas01cr@0 1623 while(w--){
mas01cr@0 1624 tmp2=*ps;
mas01mc@20 1625 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01cr@0 1626 tmp1=tmp2;
mas01mc@20 1627 ps++;
mas01mc@20 1628 }
mas01cr@125 1629 ps = qnPtr;
mas01mc@20 1630 qMeanL2 = 0;
mas01mc@20 1631 w=numVectors-sequenceLength+1;
mas01mc@20 1632 while(w--){
mas01mc@20 1633 *ps=sqrt(*ps);
mas01mc@20 1634 qMeanL2+=*ps++;
mas01cr@0 1635 }
mas01cr@0 1636 qMeanL2 /= numVectors-sequenceLength+1;
mas01mc@20 1637
mas01cr@60 1638 if(verbosity>1) {
mas01cr@60 1639 cerr << "done." << endl;
mas01cr@60 1640 }
mas01cr@0 1641
mas01cr@60 1642 if(verbosity>1) {
mas01mc@18 1643 cerr << "matching tracks..." << endl;
mas01cr@60 1644 }
mas01cr@0 1645
mas01cr@0 1646 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@18 1647 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@0 1648
mas01cr@0 1649 // Make temporary dynamic memory for results
mas01mc@18 1650 double trackDistances[trackNN];
mas01mc@18 1651 unsigned trackIDs[trackNN];
mas01mc@18 1652 unsigned trackQIndexes[trackNN];
mas01mc@18 1653 unsigned trackSIndexes[trackNN];
mas01cr@0 1654
mas01cr@0 1655 double distances[pointNN];
mas01cr@0 1656 unsigned qIndexes[pointNN];
mas01cr@0 1657 unsigned sIndexes[pointNN];
mas01cr@0 1658
mas01cr@0 1659
mas01mc@18 1660 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@0 1661 double thisDist;
mas01cr@0 1662
mas01cr@0 1663 for(k=0; k<pointNN; k++){
mas01mc@20 1664 distances[k]=1.0e6;
mas01cr@0 1665 qIndexes[k]=~0;
mas01cr@0 1666 sIndexes[k]=~0;
mas01cr@0 1667 }
mas01cr@0 1668
mas01mc@18 1669 for(k=0; k<trackNN; k++){
mas01mc@20 1670 trackDistances[k]=1.0e6;
mas01mc@18 1671 trackQIndexes[k]=~0;
mas01mc@18 1672 trackSIndexes[k]=~0;
mas01mc@18 1673 trackIDs[k]=~0;
mas01cr@0 1674 }
mas01cr@0 1675
mas01cr@0 1676 // Timestamp and durations processing
mas01cr@0 1677 double meanQdur = 0;
mas01cr@0 1678 double* timesdata = 0;
mas01cr@0 1679 double* meanDBdur = 0;
mas01cr@0 1680
mas01cr@0 1681 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1682 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1683 usingTimes=0;
mas01cr@0 1684 }
mas01cr@0 1685
mas01cr@0 1686 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1687 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1688
mas01cr@0 1689 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1690 timesdata = new double[numVectors];
mas01cr@0 1691 assert(timesdata);
mas01cr@0 1692 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1693 // Calculate durations of points
mas01cr@0 1694 for(k=0; k<numVectors-1; k++){
mas01cr@0 1695 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1696 meanQdur+=timesdata[k];
mas01cr@0 1697 }
mas01cr@0 1698 meanQdur/=k;
mas01cr@60 1699 if(verbosity>1) {
mas01cr@0 1700 cerr << "mean query file duration: " << meanQdur << endl;
mas01cr@60 1701 }
mas01cr@0 1702 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1703 assert(meanDBdur);
mas01cr@0 1704 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1705 meanDBdur[k]=0.0;
mas01mc@18 1706 for(j=0; j<trackTable[k]-1 ; j++)
mas01cr@0 1707 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1708 meanDBdur[k]/=j;
mas01cr@0 1709 }
mas01cr@0 1710 }
mas01cr@0 1711
mas01cr@0 1712 if(usingQueryPoint)
mas01cr@0 1713 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@0 1714 error("queryPoint > numVectors-wL+1 in query");
mas01cr@0 1715 else{
mas01cr@60 1716 if(verbosity>1) {
mas01cr@0 1717 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 1718 }
mas01cr@0 1719 query=query+queryPoint*dbH->dim;
mas01cr@125 1720 qnPtr=qnPtr+queryPoint;
mas01cr@0 1721 numVectors=wL;
mas01cr@0 1722 }
mas01cr@0 1723
mas01mc@20 1724 double ** D = 0; // Differences query and target
mas01cr@0 1725 double ** DD = 0; // Matched filter distance
mas01cr@0 1726
mas01cr@0 1727 D = new double*[numVectors];
mas01cr@0 1728 assert(D);
mas01cr@0 1729 DD = new double*[numVectors];
mas01cr@0 1730 assert(DD);
mas01cr@0 1731
mas01cr@0 1732 gettimeofday(&tv1, NULL);
mas01mc@18 1733 processedTracks=0;
mas01mc@18 1734 unsigned successfulTracks=0;
mas01cr@0 1735
mas01cr@0 1736 double* qp;
mas01cr@0 1737 double* sp;
mas01cr@0 1738 double* dp;
mas01cr@0 1739
mas01mc@18 1740 // build track offset table
mas01mc@18 1741 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01mc@18 1742 unsigned cumTrack=0;
mas01mc@18 1743 unsigned trackIndexOffset;
mas01cr@0 1744 for(k=0; k<dbH->numFiles;k++){
mas01mc@18 1745 trackOffsetTable[k]=cumTrack;
mas01mc@18 1746 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@0 1747 }
mas01cr@0 1748
mas01cr@0 1749 char nextKey [MAXSTR];
mas01mc@20 1750
mas01mc@20 1751 // chi^2 statistics
mas01mc@20 1752 double sampleCount = 0;
mas01mc@20 1753 double sampleSum = 0;
mas01mc@20 1754 double logSampleSum = 0;
mas01mc@20 1755 double minSample = 1e9;
mas01mc@20 1756 double maxSample = 0;
mas01mc@20 1757
mas01mc@20 1758 // Track loop
mas01cr@179 1759 size_t data_buffer_size = 0;
mas01cr@179 1760 double *data_buffer = 0;
mas01cr@179 1761 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@179 1762
mas01cr@179 1763 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@0 1764
mas01mc@18 1765 // get trackID from file if using a control file
mas01mc@18 1766 if(trackFile){
mas01mc@18 1767 if(!trackFile->eof()){
mas01mc@18 1768 trackFile->getline(nextKey,MAXSTR);
mas01mc@18 1769 track=getKeyPos(nextKey);
mas01cr@0 1770 }
mas01cr@0 1771 else
mas01cr@0 1772 break;
mas01cr@0 1773 }
mas01mc@12 1774
mas01mc@18 1775 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01mc@18 1776 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@0 1777
mas01cr@57 1778 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01cr@0 1779
mas01cr@60 1780 if(verbosity>7) {
mas01mc@18 1781 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@60 1782 }
mas01cr@0 1783
mas01mc@20 1784 // Sum products matrix
mas01cr@0 1785 for(j=0; j<numVectors;j++){
mas01mc@18 1786 D[j]=new double[trackTable[track]];
mas01cr@0 1787 assert(D[j]);
mas01cr@0 1788
mas01cr@0 1789 }
mas01cr@0 1790
mas01cr@0 1791 // Matched filter matrix
mas01cr@0 1792 for(j=0; j<numVectors;j++){
mas01mc@18 1793 DD[j]=new double[trackTable[track]];
mas01cr@0 1794 assert(DD[j]);
mas01cr@0 1795 }
mas01cr@0 1796
mas01cr@179 1797 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
mas01cr@179 1798 if(data_buffer) {
mas01cr@179 1799 free(data_buffer);
mas01cr@179 1800 }
mas01cr@179 1801 {
mas01cr@179 1802 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@179 1803 void *tmp = malloc(data_buffer_size);
mas01cr@179 1804 if (tmp == NULL) {
mas01cr@179 1805 error("error allocating data buffer");
mas01cr@179 1806 }
mas01cr@179 1807 data_buffer = (double *) tmp;
mas01cr@179 1808 }
mas01cr@179 1809 }
mas01cr@179 1810
mas01cr@179 1811 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@179 1812
mas01mc@20 1813 // Dot product
mas01cr@0 1814 for(j=0; j<numVectors; j++)
mas01mc@18 1815 for(k=0; k<trackTable[track]; k++){
mas01cr@0 1816 qp=query+j*dbH->dim;
mas01cr@179 1817 sp=data_buffer+k*dbH->dim;
mas01cr@0 1818 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@0 1819 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@0 1820 *dp=0.0; // initialize correlation cell
mas01cr@0 1821 l=dbH->dim; // size of vectors
mas01cr@0 1822 while(l--)
mas01cr@0 1823 *dp+=*qp++**sp++;
mas01cr@0 1824 }
mas01cr@0 1825
mas01cr@0 1826 // Matched Filter
mas01cr@0 1827 // HOP SIZE == 1
mas01cr@0 1828 double* spd;
mas01cr@0 1829 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@0 1830 for(w=0; w<wL; w++)
mas01cr@0 1831 for(j=0; j<numVectors-w; j++){
mas01cr@0 1832 sp=DD[j];
mas01cr@0 1833 spd=D[j+w]+w;
mas01mc@18 1834 k=trackTable[track]-w;
mas01cr@0 1835 while(k--)
mas01cr@0 1836 *sp+++=*spd++;
mas01cr@0 1837 }
mas01cr@0 1838 }
mas01mc@20 1839
mas01cr@0 1840 else{ // HOP_SIZE != 1
mas01cr@0 1841 for(w=0; w<wL; w++)
mas01cr@0 1842 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@0 1843 sp=DD[j];
mas01cr@0 1844 spd=D[j+w]+w;
mas01mc@18 1845 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@0 1846 *sp+=*spd;
mas01cr@0 1847 sp+=HOP_SIZE;
mas01cr@0 1848 spd+=HOP_SIZE;
mas01cr@0 1849 }
mas01cr@0 1850 }
mas01cr@0 1851 }
mas01cr@0 1852
mas01cr@60 1853 if(verbosity>3 && usingTimes) {
mas01mc@18 1854 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
mas01cr@0 1855 cerr.flush();
mas01cr@0 1856 }
mas01cr@0 1857
mas01cr@0 1858 if(!usingTimes ||
mas01cr@0 1859 (usingTimes
mas01mc@18 1860 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1861
mas01cr@60 1862 if(verbosity>3 && usingTimes) {
mas01cr@0 1863 cerr << "within duration tolerance." << endl;
mas01cr@0 1864 cerr.flush();
mas01cr@0 1865 }
mas01cr@0 1866
mas01cr@0 1867 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@53 1868 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@53 1869 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@125 1870 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@60 1871 if(verbosity>10) {
mas01cr@125 1872 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
mas01cr@60 1873 }
mas01mc@20 1874 // Gather chi^2 statistics
mas01mc@20 1875 if(thisDist<minSample)
mas01mc@20 1876 minSample=thisDist;
mas01mc@20 1877 else if(thisDist>maxSample)
mas01mc@20 1878 maxSample=thisDist;
mas01mc@20 1879 if(thisDist>1e-9){
mas01mc@20 1880 sampleCount++;
mas01mc@20 1881 sampleSum+=thisDist;
mas01mc@20 1882 logSampleSum+=log(thisDist);
mas01mc@20 1883 }
mas01mc@20 1884
mas01cr@125 1885 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01cr@0 1886 // Power test
mas01cr@0 1887 if(!USE_THRESH ||
mas01cr@0 1888 // Threshold on mean L2 of Q and S sequences
mas01cr@125 1889 (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH &&
mas01cr@0 1890 // Are both query and target windows above mean energy?
mas01cr@125 1891 (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH )))
mas01mc@20 1892 thisDist=thisDist; // Computed above
mas01cr@0 1893 else
mas01mc@20 1894 thisDist=1000000.0;
mas01mc@20 1895
mas01mc@20 1896 // k-NN match algorithm
mas01cr@58 1897 m=pointNN;
mas01mc@20 1898 while(m--){
mas01mc@20 1899 if(thisDist<=distances[m])
mas01mc@20 1900 if(m==0 || thisDist>=distances[m-1]){
mas01cr@0 1901 // Shuffle distances up the list
mas01cr@0 1902 for(l=pointNN-1; l>m; l--){
mas01cr@0 1903 distances[l]=distances[l-1];
mas01cr@0 1904 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1905 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1906 }
mas01cr@0 1907 distances[m]=thisDist;
mas01cr@0 1908 if(usingQueryPoint)
mas01cr@0 1909 qIndexes[m]=queryPoint;
mas01cr@0 1910 else
mas01cr@0 1911 qIndexes[m]=j;
mas01cr@0 1912 sIndexes[m]=k;
mas01cr@0 1913 break;
mas01mc@20 1914 }
mas01cr@0 1915 }
mas01cr@0 1916 }
mas01cr@0 1917 // Calculate the mean of the N-Best matches
mas01cr@0 1918 thisDist=0.0;
mas01cr@53 1919 for(m=0; m<pointNN; m++) {
mas01cr@53 1920 if (distances[m] == 1000000.0) break;
mas01mc@20 1921 thisDist+=distances[m];
mas01cr@53 1922 }
mas01cr@53 1923 thisDist/=m;
mas01cr@0 1924
mas01mc@12 1925 // Let's see the distances then...
mas01cr@60 1926 if(verbosity>3) {
mas01mc@18 1927 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
mas01cr@60 1928 }
mas01mc@12 1929
mas01mc@20 1930
mas01mc@18 1931 // All the track stuff goes here
mas01cr@58 1932 n=trackNN;
mas01cr@0 1933 while(n--){
mas01mc@20 1934 if(thisDist<=trackDistances[n]){
mas01mc@20 1935 if((n==0 || thisDist>=trackDistances[n-1])){
mas01cr@0 1936 // Copy all values above up the queue
mas01mc@18 1937 for( l=trackNN-1 ; l > n ; l--){
mas01mc@18 1938 trackDistances[l]=trackDistances[l-1];
mas01mc@18 1939 trackQIndexes[l]=trackQIndexes[l-1];
mas01mc@18 1940 trackSIndexes[l]=trackSIndexes[l-1];
mas01mc@18 1941 trackIDs[l]=trackIDs[l-1];
mas01cr@0 1942 }
mas01mc@18 1943 trackDistances[n]=thisDist;
mas01mc@18 1944 trackQIndexes[n]=qIndexes[0];
mas01mc@18 1945 trackSIndexes[n]=sIndexes[0];
mas01mc@18 1946 successfulTracks++;
mas01mc@18 1947 trackIDs[n]=track;
mas01cr@0 1948 break;
mas01cr@0 1949 }
mas01cr@0 1950 }
mas01cr@0 1951 else
mas01cr@0 1952 break;
mas01cr@0 1953 }
mas01cr@0 1954 } // Duration match
mas01mc@20 1955
mas01mc@18 1956 // Clean up current track
mas01cr@0 1957 if(D!=NULL){
mas01cr@0 1958 for(j=0; j<numVectors; j++)
mas01cr@0 1959 delete[] D[j];
mas01cr@0 1960 }
mas01cr@0 1961
mas01cr@0 1962 if(DD!=NULL){
mas01cr@0 1963 for(j=0; j<numVectors; j++)
mas01cr@0 1964 delete[] DD[j];
mas01cr@0 1965 }
mas01cr@0 1966 }
mas01mc@20 1967 // per-track reset array values
mas01mc@20 1968 for(unsigned k=0; k<pointNN; k++){
mas01mc@20 1969 distances[k]=1.0e6;
mas01mc@20 1970 qIndexes[k]=~0;
mas01mc@20 1971 sIndexes[k]=~0;
mas01mc@20 1972 }
mas01cr@0 1973 }
mas01cr@0 1974
mas01cr@179 1975 free(data_buffer);
mas01cr@179 1976
mas01cr@0 1977 gettimeofday(&tv2,NULL);
mas01cr@60 1978 if(verbosity>1) {
mas01mc@18 1979 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@0 1980 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01mc@20 1981 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01mc@20 1982 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
mas01mc@20 1983 }
mas01cr@133 1984 if(adbQueryResponse==0){
mas01cr@60 1985 if(verbosity>1) {
mas01cr@0 1986 cerr<<endl;
mas01cr@60 1987 }
mas01cr@0 1988 // Output answer
mas01cr@0 1989 // Loop over nearest neighbours
mas01mc@18 1990 for(k=0; k < min(trackNN,successfulTracks); k++)
mas01mc@20 1991 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
mas01mc@20 1992 << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
mas01cr@0 1993 }
mas01cr@0 1994 else{ // Process Web Services Query
mas01mc@18 1995 int listLen = min(trackNN, processedTracks);
mas01cr@133 1996 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 1997 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 1998 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 1999 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 2000 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 2001 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 2002 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 2003 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 2004 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 2005 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 2006 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@133 2007 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@133 2008 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@133 2009 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@0 2010 }
mas01cr@0 2011 }
mas01cr@0 2012
mas01cr@0 2013
mas01cr@0 2014 // Clean up
mas01mc@18 2015 if(trackOffsetTable)
mas01mc@20 2016 delete[] trackOffsetTable;
mas01cr@0 2017 if(queryCopy)
mas01mc@20 2018 delete[] queryCopy;
mas01cr@121 2019 if(qNorm)
mas01cr@121 2020 delete[] qNorm;
mas01cr@121 2021 if(sNorm)
mas01cr@121 2022 delete[] sNorm;
mas01cr@121 2023 if(sMeanL2)
mas01cr@121 2024 delete[] sMeanL2;
mas01cr@0 2025 if(D)
mas01cr@0 2026 delete[] D;
mas01cr@0 2027 if(DD)
mas01cr@0 2028 delete[] DD;
mas01cr@0 2029 if(timesdata)
mas01mc@20 2030 delete[] timesdata;
mas01cr@0 2031 if(meanDBdur)
mas01mc@20 2032 delete[] meanDBdur;
mas01cr@0 2033
mas01cr@0 2034
mas01cr@0 2035 }
mas01cr@0 2036
mas01mc@20 2037 // Radius search between query and target tracks
mas01mc@20 2038 // efficient implementation based on matched filter
mas01mc@20 2039 // assumes normed shingles
mas01mc@20 2040 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
mas01cr@133 2041 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01mc@17 2042
mas01cr@177 2043 initTables(dbName, inFile);
mas01mc@17 2044
mas01mc@17 2045 // For each input vector, find the closest pointNN matching output vectors and report
mas01mc@17 2046 // we use stdout in this stub version
mas01mc@17 2047 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01mc@17 2048 double* query = (double*)(indata+sizeof(int));
mas01mc@17 2049 double* queryCopy = 0;
mas01mc@17 2050
mas01mc@17 2051 double qMeanL2;
mas01mc@17 2052 double* sMeanL2;
mas01mc@17 2053
mas01mc@17 2054 unsigned USE_THRESH=0;
mas01mc@17 2055 double SILENCE_THRESH=0;
mas01mc@17 2056 double DIFF_THRESH=0;
mas01mc@17 2057
mas01mc@17 2058 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01mc@17 2059 error("Database must be L2 normed for sequence query","use -l2norm");
mas01mc@17 2060
mas01cr@60 2061 if(verbosity>1) {
mas01mc@17 2062 cerr << "performing norms ... "; cerr.flush();
mas01cr@60 2063 }
mas01mc@17 2064 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01mc@18 2065
mas01mc@17 2066 // Make a copy of the query
mas01mc@17 2067 queryCopy = new double[numVectors*dbH->dim];
mas01mc@17 2068 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01mc@17 2069 qNorm = new double[numVectors];
mas01mc@17 2070 sNorm = new double[dbVectors];
mas01mc@17 2071 sMeanL2=new double[dbH->numFiles];
mas01mc@17 2072 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
mas01mc@17 2073 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01mc@17 2074 query = queryCopy;
mas01mc@18 2075
mas01mc@17 2076 // Make norm measurements relative to sequenceLength
mas01mc@17 2077 unsigned w = sequenceLength-1;
mas01mc@17 2078 unsigned i,j;
mas01mc@17 2079 double* ps;
mas01mc@17 2080 double tmp1,tmp2;
mas01mc@18 2081
mas01mc@17 2082 // Copy the L2 norm values to core to avoid disk random access later on
mas01mc@17 2083 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01mc@17 2084 double* snPtr = sNorm;
mas01cr@125 2085 double* qnPtr = qNorm;
mas01mc@17 2086 for(i=0; i<dbH->numFiles; i++){
mas01mc@18 2087 if(trackTable[i]>=sequenceLength){
mas01mc@17 2088 tmp1=*snPtr;
mas01mc@17 2089 j=1;
mas01mc@17 2090 w=sequenceLength-1;
mas01mc@17 2091 while(w--)
mas01mc@17 2092 *snPtr+=snPtr[j++];
mas01mc@17 2093 ps = snPtr+1;
mas01mc@18 2094 w=trackTable[i]-sequenceLength; // +1 - 1
mas01mc@17 2095 while(w--){
mas01mc@17 2096 tmp2=*ps;
mas01mc@17 2097 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01mc@17 2098 tmp1=tmp2;
mas01mc@17 2099 ps++;
mas01mc@17 2100 }
mas01mc@17 2101 ps = snPtr;
mas01mc@18 2102 w=trackTable[i]-sequenceLength+1;
mas01mc@17 2103 while(w--){
mas01mc@17 2104 *ps=sqrt(*ps);
mas01mc@17 2105 ps++;
mas01mc@17 2106 }
mas01mc@17 2107 }
mas01mc@18 2108 snPtr+=trackTable[i];
mas01mc@17 2109 }
mas01mc@17 2110
mas01mc@17 2111 double* pn = sMeanL2;
mas01mc@17 2112 w=dbH->numFiles;
mas01mc@17 2113 while(w--)
mas01mc@17 2114 *pn++=0.0;
mas01mc@17 2115 ps=sNorm;
mas01mc@18 2116 unsigned processedTracks=0;
mas01mc@17 2117 for(i=0; i<dbH->numFiles; i++){
mas01mc@18 2118 if(trackTable[i]>sequenceLength-1){
mas01cr@70 2119 w = trackTable[i]-sequenceLength+1;
mas01mc@17 2120 pn = sMeanL2+i;
mas01mc@17 2121 *pn=0;
mas01mc@17 2122 while(w--)
mas01mc@17 2123 if(*ps>0)
mas01mc@17 2124 *pn+=*ps++;
mas01cr@70 2125 *pn/=trackTable[i]-sequenceLength+1;
mas01mc@17 2126 SILENCE_THRESH+=*pn;
mas01mc@18 2127 processedTracks++;
mas01mc@17 2128 }
mas01mc@18 2129 ps = sNorm + trackTable[i];
mas01mc@17 2130 }
mas01cr@60 2131 if(verbosity>1) {
mas01mc@18 2132 cerr << "processedTracks: " << processedTracks << endl;
mas01cr@60 2133 }
mas01mc@17 2134
mas01mc@18 2135 SILENCE_THRESH/=processedTracks;
mas01mc@17 2136 USE_THRESH=1; // Turn thresholding on
mas01mc@18 2137 DIFF_THRESH=SILENCE_THRESH; // mean shingle power
mas01mc@17 2138 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
mas01cr@60 2139 if(verbosity>4) {
mas01mc@17 2140 cerr << "silence thresh: " << SILENCE_THRESH;
mas01cr@60 2141 }
mas01mc@17 2142 w=sequenceLength-1;
mas01mc@17 2143 i=1;
mas01cr@125 2144 tmp1=*qnPtr;
mas01mc@17 2145 while(w--)
mas01cr@125 2146 *qnPtr+=qnPtr[i++];
mas01cr@125 2147 ps = qnPtr+1;
mas01mc@17 2148 w=numVectors-sequenceLength; // +1 -1
mas01mc@17 2149 while(w--){
mas01mc@17 2150 tmp2=*ps;
mas01mc@17 2151 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
mas01mc@17 2152 tmp1=tmp2;
mas01mc@17 2153 ps++;
mas01mc@17 2154 }
mas01cr@125 2155 ps = qnPtr;
mas01mc@17 2156 qMeanL2 = 0;
mas01mc@17 2157 w=numVectors-sequenceLength+1;
mas01mc@17 2158 while(w--){
mas01mc@17 2159 *ps=sqrt(*ps);
mas01mc@17 2160 qMeanL2+=*ps++;
mas01mc@17 2161 }
mas01mc@17 2162 qMeanL2 /= numVectors-sequenceLength+1;
mas01mc@17 2163
mas01cr@60 2164 if(verbosity>1) {
mas01mc@17 2165 cerr << "done." << endl;
mas01cr@60 2166 }
mas01mc@17 2167
mas01cr@60 2168 if(verbosity>1) {
mas01mc@18 2169 cerr << "matching tracks..." << endl;
mas01cr@60 2170 }
mas01mc@17 2171
mas01mc@17 2172 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@18 2173 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@17 2174
mas01mc@17 2175 // Make temporary dynamic memory for results
mas01mc@18 2176 double trackDistances[trackNN];
mas01mc@18 2177 unsigned trackIDs[trackNN];
mas01mc@18 2178 unsigned trackQIndexes[trackNN];
mas01mc@18 2179 unsigned trackSIndexes[trackNN];
mas01mc@17 2180
mas01mc@17 2181 double distances[pointNN];
mas01mc@17 2182 unsigned qIndexes[pointNN];
mas01mc@17 2183 unsigned sIndexes[pointNN];
mas01mc@17 2184
mas01mc@17 2185
mas01cr@59 2186 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01mc@17 2187 double thisDist;
mas01mc@17 2188
mas01mc@17 2189 for(k=0; k<pointNN; k++){
mas01mc@17 2190 distances[k]=0.0;
mas01mc@17 2191 qIndexes[k]=~0;
mas01mc@17 2192 sIndexes[k]=~0;
mas01mc@17 2193 }
mas01mc@17 2194
mas01mc@18 2195 for(k=0; k<trackNN; k++){
mas01mc@18 2196 trackDistances[k]=0.0;
mas01mc@18 2197 trackQIndexes[k]=~0;
mas01mc@18 2198 trackSIndexes[k]=~0;
mas01mc@18 2199 trackIDs[k]=~0;
mas01mc@17 2200 }
mas01mc@17 2201
mas01mc@17 2202 // Timestamp and durations processing
mas01mc@17 2203 double meanQdur = 0;
mas01mc@17 2204 double* timesdata = 0;
mas01mc@17 2205 double* meanDBdur = 0;
mas01mc@17 2206
mas01mc@17 2207 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01mc@17 2208 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01mc@17 2209 usingTimes=0;
mas01mc@17 2210 }
mas01mc@17 2211
mas01mc@17 2212 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01mc@17 2213 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01mc@17 2214
mas01mc@17 2215 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01mc@17 2216 timesdata = new double[numVectors];
mas01mc@17 2217 assert(timesdata);
mas01mc@17 2218 insertTimeStamps(numVectors, timesFile, timesdata);
mas01mc@17 2219 // Calculate durations of points
mas01mc@17 2220 for(k=0; k<numVectors-1; k++){
mas01mc@17 2221 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01mc@17 2222 meanQdur+=timesdata[k];
mas01mc@17 2223 }
mas01mc@17 2224 meanQdur/=k;
mas01cr@60 2225 if(verbosity>1) {
mas01mc@17 2226 cerr << "mean query file duration: " << meanQdur << endl;
mas01cr@60 2227 }
mas01mc@17 2228 meanDBdur = new double[dbH->numFiles];
mas01mc@17 2229 assert(meanDBdur);
mas01mc@17 2230 for(k=0; k<dbH->numFiles; k++){
mas01mc@17 2231 meanDBdur[k]=0.0;
mas01mc@18 2232 for(j=0; j<trackTable[k]-1 ; j++)
mas01mc@17 2233 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01mc@17 2234 meanDBdur[k]/=j;
mas01mc@17 2235 }
mas01mc@17 2236 }
mas01mc@17 2237
mas01mc@17 2238 if(usingQueryPoint)
mas01mc@17 2239 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01mc@17 2240 error("queryPoint > numVectors-wL+1 in query");
mas01mc@17 2241 else{
mas01cr@60 2242 if(verbosity>1) {
mas01mc@17 2243 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 2244 }
mas01mc@17 2245 query=query+queryPoint*dbH->dim;
mas01cr@125 2246 qnPtr=qnPtr+queryPoint;
mas01mc@17 2247 numVectors=wL;
mas01mc@17 2248 }
mas01mc@17 2249
mas01mc@17 2250 double ** D = 0; // Differences query and target
mas01mc@17 2251 double ** DD = 0; // Matched filter distance
mas01mc@17 2252
mas01mc@17 2253 D = new double*[numVectors];
mas01mc@17 2254 assert(D);
mas01mc@17 2255 DD = new double*[numVectors];
mas01mc@17 2256 assert(DD);
mas01mc@17 2257
mas01mc@17 2258 gettimeofday(&tv1, NULL);
mas01mc@18 2259 processedTracks=0;
mas01mc@18 2260 unsigned successfulTracks=0;
mas01mc@17 2261
mas01mc@17 2262 double* qp;
mas01mc@17 2263 double* sp;
mas01mc@17 2264 double* dp;
mas01mc@17 2265
mas01mc@18 2266 // build track offset table
mas01mc@18 2267 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01mc@18 2268 unsigned cumTrack=0;
mas01mc@18 2269 unsigned trackIndexOffset;
mas01mc@17 2270 for(k=0; k<dbH->numFiles;k++){
mas01mc@18 2271 trackOffsetTable[k]=cumTrack;
mas01mc@18 2272 cumTrack+=trackTable[k]*dbH->dim;
mas01mc@17 2273 }
mas01mc@17 2274
mas01mc@17 2275 char nextKey [MAXSTR];
mas01mc@17 2276
mas01mc@17 2277 // chi^2 statistics
mas01mc@17 2278 double sampleCount = 0;
mas01mc@17 2279 double sampleSum = 0;
mas01mc@17 2280 double logSampleSum = 0;
mas01mc@17 2281 double minSample = 1e9;
mas01mc@17 2282 double maxSample = 0;
mas01mc@17 2283
mas01mc@17 2284 // Track loop
mas01cr@179 2285 size_t data_buffer_size = 0;
mas01cr@179 2286 double *data_buffer = 0;
mas01cr@179 2287 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@179 2288
mas01mc@18 2289 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01mc@17 2290
mas01mc@18 2291 // get trackID from file if using a control file
mas01mc@18 2292 if(trackFile){
mas01mc@18 2293 if(!trackFile->eof()){
mas01mc@18 2294 trackFile->getline(nextKey,MAXSTR);
mas01mc@18 2295 track=getKeyPos(nextKey);
mas01mc@17 2296 }
mas01mc@17 2297 else
mas01mc@17 2298 break;
mas01mc@17 2299 }
mas01mc@17 2300
mas01mc@18 2301 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01mc@18 2302 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01mc@17 2303
mas01cr@70 2304 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01mc@17 2305
mas01cr@60 2306 if(verbosity>7) {
mas01mc@18 2307 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@60 2308 }
mas01cr@60 2309
mas01mc@17 2310 // Sum products matrix
mas01mc@17 2311 for(j=0; j<numVectors;j++){
mas01mc@18 2312 D[j]=new double[trackTable[track]];
mas01mc@17 2313 assert(D[j]);
mas01mc@17 2314
mas01mc@17 2315 }
mas01mc@17 2316
mas01mc@17 2317 // Matched filter matrix
mas01mc@17 2318 for(j=0; j<numVectors;j++){
mas01mc@18 2319 DD[j]=new double[trackTable[track]];
mas01mc@17 2320 assert(DD[j]);
mas01mc@17 2321 }
mas01mc@17 2322
mas01cr@179 2323 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
mas01cr@179 2324 if(data_buffer) {
mas01cr@179 2325 free(data_buffer);
mas01cr@179 2326 }
mas01cr@179 2327 {
mas01cr@179 2328 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@179 2329 void *tmp = malloc(data_buffer_size);
mas01cr@179 2330 if (tmp == NULL) {
mas01cr@179 2331 error("error allocating data buffer");
mas01cr@179 2332 }
mas01cr@179 2333 data_buffer = (double *) tmp;
mas01cr@179 2334 }
mas01cr@179 2335 }
mas01cr@179 2336
mas01cr@179 2337 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@179 2338
mas01mc@17 2339 // Dot product
mas01mc@17 2340 for(j=0; j<numVectors; j++)
mas01mc@18 2341 for(k=0; k<trackTable[track]; k++){
mas01mc@17 2342 qp=query+j*dbH->dim;
mas01cr@179 2343 sp=data_buffer+k*dbH->dim;
mas01mc@17 2344 DD[j][k]=0.0; // Initialize matched filter array
mas01mc@17 2345 dp=&D[j][k]; // point to correlation cell j,k
mas01mc@17 2346 *dp=0.0; // initialize correlation cell
mas01mc@17 2347 l=dbH->dim; // size of vectors
mas01mc@17 2348 while(l--)
mas01mc@17 2349 *dp+=*qp++**sp++;
mas01mc@17 2350 }
mas01cr@179 2351
mas01mc@17 2352 // Matched Filter
mas01mc@17 2353 // HOP SIZE == 1
mas01mc@17 2354 double* spd;
mas01mc@17 2355 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01mc@17 2356 for(w=0; w<wL; w++)
mas01mc@17 2357 for(j=0; j<numVectors-w; j++){
mas01mc@17 2358 sp=DD[j];
mas01mc@17 2359 spd=D[j+w]+w;
mas01mc@18 2360 k=trackTable[track]-w;
mas01mc@17 2361 while(k--)
mas01mc@17 2362 *sp+++=*spd++;
mas01mc@17 2363 }
mas01mc@17 2364 }
mas01mc@17 2365
mas01mc@17 2366 else{ // HOP_SIZE != 1
mas01mc@17 2367 for(w=0; w<wL; w++)
mas01mc@17 2368 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01mc@17 2369 sp=DD[j];
mas01mc@17 2370 spd=D[j+w]+w;
mas01mc@18 2371 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01mc@17 2372 *sp+=*spd;
mas01mc@17 2373 sp+=HOP_SIZE;
mas01mc@17 2374 spd+=HOP_SIZE;
mas01mc@17 2375 }
mas01mc@17 2376 }
mas01mc@17 2377 }
mas01mc@17 2378
mas01cr@60 2379 if(verbosity>3 && usingTimes) {
mas01mc@18 2380 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
mas01mc@17 2381 cerr.flush();
mas01mc@17 2382 }
mas01mc@17 2383
mas01mc@17 2384 if(!usingTimes ||
mas01mc@17 2385 (usingTimes
mas01mc@18 2386 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01mc@17 2387
mas01cr@60 2388 if(verbosity>3 && usingTimes) {
mas01mc@17 2389 cerr << "within duration tolerance." << endl;
mas01mc@17 2390 cerr.flush();
mas01mc@17 2391 }
mas01mc@17 2392
mas01mc@17 2393 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@70 2394 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@70 2395 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@125 2396 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@60 2397 if(verbosity>10) {
mas01cr@125 2398 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
mas01cr@60 2399 }
mas01mc@17 2400 // Gather chi^2 statistics
mas01mc@17 2401 if(thisDist<minSample)
mas01mc@17 2402 minSample=thisDist;
mas01mc@17 2403 else if(thisDist>maxSample)
mas01mc@17 2404 maxSample=thisDist;
mas01mc@17 2405 if(thisDist>1e-9){
mas01mc@17 2406 sampleCount++;
mas01mc@17 2407 sampleSum+=thisDist;
mas01mc@17 2408 logSampleSum+=log(thisDist);
mas01mc@17 2409 }
mas01mc@17 2410
mas01cr@125 2411 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01mc@17 2412 // Power test
mas01mc@17 2413 if(!USE_THRESH ||
mas01mc@17 2414 // Threshold on mean L2 of Q and S sequences
mas01cr@125 2415 (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH &&
mas01mc@17 2416 // Are both query and target windows above mean energy?
mas01cr@125 2417 (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH )))
mas01mc@17 2418 thisDist=thisDist; // Computed above
mas01mc@17 2419 else
mas01mc@17 2420 thisDist=1000000.0;
mas01mc@17 2421 if(thisDist>=0 && thisDist<=radius){
mas01mc@17 2422 distances[0]++; // increment count
mas01mc@18 2423 break; // only need one track point per query point
mas01mc@17 2424 }
mas01mc@17 2425 }
mas01mc@17 2426 // How many points were below threshold ?
mas01mc@17 2427 thisDist=distances[0];
mas01mc@17 2428
mas01mc@17 2429 // Let's see the distances then...
mas01cr@60 2430 if(verbosity>3) {
mas01mc@18 2431 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
mas01cr@60 2432 }
mas01mc@17 2433
mas01mc@18 2434 // All the track stuff goes here
mas01mc@18 2435 n=trackNN;
mas01mc@17 2436 while(n--){
mas01mc@18 2437 if(thisDist>trackDistances[n]){
mas01mc@18 2438 if((n==0 || thisDist<=trackDistances[n-1])){
mas01mc@17 2439 // Copy all values above up the queue
mas01mc@18 2440 for( l=trackNN-1 ; l > n ; l--){
mas01mc@18 2441 trackDistances[l]=trackDistances[l-1];
mas01mc@18 2442 trackQIndexes[l]=trackQIndexes[l-1];
mas01mc@18 2443 trackSIndexes[l]=trackSIndexes[l-1];
mas01mc@18 2444 trackIDs[l]=trackIDs[l-1];
mas01mc@17 2445 }
mas01mc@18 2446 trackDistances[n]=thisDist;
mas01mc@18 2447 trackQIndexes[n]=qIndexes[0];
mas01mc@18 2448 trackSIndexes[n]=sIndexes[0];
mas01mc@18 2449 successfulTracks++;
mas01mc@18 2450 trackIDs[n]=track;
mas01mc@17 2451 break;
mas01mc@17 2452 }
mas01mc@17 2453 }
mas01mc@17 2454 else
mas01mc@17 2455 break;
mas01mc@17 2456 }
mas01mc@17 2457 } // Duration match
mas01mc@17 2458
mas01mc@18 2459 // Clean up current track
mas01mc@17 2460 if(D!=NULL){
mas01mc@17 2461 for(j=0; j<numVectors; j++)
mas01mc@17 2462 delete[] D[j];
mas01mc@17 2463 }
mas01mc@17 2464
mas01mc@17 2465 if(DD!=NULL){
mas01mc@17 2466 for(j=0; j<numVectors; j++)
mas01mc@17 2467 delete[] DD[j];
mas01mc@17 2468 }
mas01mc@17 2469 }
mas01mc@18 2470 // per-track reset array values
mas01mc@17 2471 for(unsigned k=0; k<pointNN; k++){
mas01mc@17 2472 distances[k]=0.0;
mas01mc@17 2473 qIndexes[k]=~0;
mas01mc@17 2474 sIndexes[k]=~0;
mas01mc@17 2475 }
mas01mc@17 2476 }
mas01mc@17 2477
mas01cr@179 2478 free(data_buffer);
mas01cr@179 2479
mas01mc@17 2480 gettimeofday(&tv2,NULL);
mas01cr@60 2481 if(verbosity>1) {
mas01mc@18 2482 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01mc@17 2483 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01mc@17 2484 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01mc@17 2485 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
mas01mc@17 2486 }
mas01mc@17 2487
mas01cr@133 2488 if(adbQueryResponse==0){
mas01cr@60 2489 if(verbosity>1) {
mas01mc@17 2490 cerr<<endl;
mas01cr@60 2491 }
mas01mc@17 2492 // Output answer
mas01mc@17 2493 // Loop over nearest neighbours
mas01mc@18 2494 for(k=0; k < min(trackNN,successfulTracks); k++)
mas01mc@18 2495 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl;
mas01mc@17 2496 }
mas01mc@17 2497 else{ // Process Web Services Query
mas01mc@18 2498 int listLen = min(trackNN, processedTracks);
mas01cr@133 2499 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 2500 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 2501 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 2502 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 2503 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 2504 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 2505 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 2506 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 2507 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 2508 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 2509 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@133 2510 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@133 2511 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@133 2512 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01mc@17 2513 }
mas01mc@17 2514 }
mas01mc@17 2515
mas01mc@17 2516 // Clean up
mas01mc@18 2517 if(trackOffsetTable)
mas01mc@18 2518 delete[] trackOffsetTable;
mas01mc@17 2519 if(queryCopy)
mas01mc@17 2520 delete[] queryCopy;
mas01cr@121 2521 if(qNorm)
mas01cr@121 2522 delete[] qNorm;
mas01cr@121 2523 if(sNorm)
mas01cr@121 2524 delete[] sNorm;
mas01cr@121 2525 if(sMeanL2)
mas01cr@121 2526 delete[] sMeanL2;
mas01mc@17 2527 if(D)
mas01mc@17 2528 delete[] D;
mas01mc@17 2529 if(DD)
mas01mc@17 2530 delete[] DD;
mas01mc@17 2531 if(timesdata)
mas01mc@17 2532 delete[] timesdata;
mas01mc@17 2533 if(meanDBdur)
mas01mc@17 2534 delete[] meanDBdur;
mas01mc@17 2535
mas01mc@17 2536
mas01mc@17 2537 }
mas01mc@17 2538
mas01cr@0 2539 // Unit norm block of features
mas01cr@0 2540 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@0 2541 unsigned d;
mas01cr@59 2542 double L2, *p;
mas01cr@60 2543 if(verbosity>2) {
mas01cr@0 2544 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@60 2545 }
mas01cr@0 2546 while(n--){
mas01cr@0 2547 p=X;
mas01cr@0 2548 L2=0.0;
mas01cr@0 2549 d=dim;
mas01cr@0 2550 while(d--){
mas01cr@0 2551 L2+=*p**p;
mas01cr@0 2552 p++;
mas01cr@0 2553 }
mas01mc@17 2554 /* L2=sqrt(L2);*/
mas01cr@0 2555 if(qNorm)
mas01cr@0 2556 *qNorm++=L2;
mas01mc@17 2557 /*
mas01cr@0 2558 oneOverL2 = 1.0/L2;
mas01cr@0 2559 d=dim;
mas01cr@0 2560 while(d--){
mas01cr@0 2561 *X*=oneOverL2;
mas01cr@0 2562 X++;
mas01mc@17 2563 */
mas01mc@17 2564 X+=dim;
mas01cr@0 2565 }
mas01cr@60 2566 if(verbosity>2) {
mas01cr@0 2567 cerr << "done..." << endl;
mas01cr@60 2568 }
mas01cr@0 2569 }
mas01cr@0 2570
mas01cr@0 2571 // Unit norm block of features
mas01cr@0 2572 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 2573 unsigned d;
mas01cr@59 2574 double *p;
mas01cr@0 2575 unsigned nn = n;
mas01cr@0 2576
mas01cr@0 2577 assert(l2normTable);
mas01cr@0 2578
mas01cr@0 2579 if( !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 2580 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 2581
mas01cr@60 2582 if(verbosity>2) {
mas01cr@0 2583 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@60 2584 }
mas01cr@0 2585
mas01cr@0 2586 double* l2buf = new double[n];
mas01cr@0 2587 double* l2ptr = l2buf;
mas01cr@0 2588 assert(l2buf);
mas01cr@0 2589 assert(X);
mas01cr@0 2590
mas01cr@0 2591 while(nn--){
mas01cr@0 2592 p=X;
mas01cr@0 2593 *l2ptr=0.0;
mas01cr@0 2594 d=dim;
mas01cr@0 2595 while(d--){
mas01cr@0 2596 *l2ptr+=*p**p;
mas01cr@0 2597 p++;
mas01cr@0 2598 }
mas01mc@17 2599 l2ptr++;
mas01mc@17 2600 /*
mas01mc@17 2601 oneOverL2 = 1.0/(*l2ptr++);
mas01mc@17 2602 d=dim;
mas01mc@17 2603 while(d--){
mas01cr@0 2604 *X*=oneOverL2;
mas01cr@0 2605 X++;
mas01mc@17 2606 }
mas01mc@17 2607 */
mas01mc@17 2608 X+=dim;
mas01cr@0 2609 }
mas01cr@0 2610 unsigned offset;
mas01cr@84 2611 if(append) {
mas01cr@84 2612 // FIXME: a hack, a very palpable hack: the vectors have already
mas01cr@84 2613 // been inserted, and dbH->length has already been updated. We
mas01cr@84 2614 // need to subtract off again the number of vectors that we've
mas01cr@84 2615 // inserted this time...
mas01cr@84 2616 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
mas01cr@84 2617 } else {
mas01cr@0 2618 offset=0;
mas01cr@84 2619 }
mas01cr@0 2620 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 2621 if(l2buf)
mas01mc@17 2622 delete[] l2buf;
mas01cr@60 2623 if(verbosity>2) {
mas01cr@0 2624 cerr << "done..." << endl;
mas01cr@60 2625 }
mas01cr@0 2626 }
mas01cr@0 2627
mas01cr@0 2628
mas01cr@0 2629 // Start an audioDB server on the host
mas01cr@0 2630 void audioDB::startServer(){
mas01cr@0 2631 struct soap soap;
mas01cr@0 2632 int m, s; // master and slave sockets
mas01cr@0 2633 soap_init(&soap);
mas01cr@92 2634 // FIXME: largely this use of SO_REUSEADDR is to make writing (and
mas01cr@92 2635 // running) test cases more convenient, so that multiple test runs
mas01cr@92 2636 // in close succession don't fail because of a bin() error.
mas01cr@92 2637 // Investigate whether there are any potential drawbacks in this,
mas01cr@92 2638 // and also whether there's a better way to write the tests. --
mas01cr@92 2639 // CSR, 2007-10-03
mas01cr@92 2640 soap.bind_flags |= SO_REUSEADDR;
mas01cr@0 2641 m = soap_bind(&soap, NULL, port, 100);
mas01cr@0 2642 if (m < 0)
mas01cr@0 2643 soap_print_fault(&soap, stderr);
mas01cr@0 2644 else
mas01cr@0 2645 {
mas01cr@0 2646 fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
mas01cr@0 2647 for (int i = 1; ; i++)
mas01cr@0 2648 {
mas01cr@0 2649 s = soap_accept(&soap);
mas01cr@0 2650 if (s < 0)
mas01cr@0 2651 {
mas01cr@0 2652 soap_print_fault(&soap, stderr);
mas01cr@0 2653 break;
mas01cr@0 2654 }
mas01cr@75 2655 fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i,
mas01cr@0 2656 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
mas01cr@0 2657 if (soap_serve(&soap) != SOAP_OK) // process RPC request
mas01cr@0 2658 soap_print_fault(&soap, stderr); // print error
mas01cr@0 2659 fprintf(stderr, "request served\n");
mas01cr@0 2660 soap_destroy(&soap); // clean up class instances
mas01cr@0 2661 soap_end(&soap); // clean up everything and close socket
mas01cr@0 2662 }
mas01cr@0 2663 }
mas01cr@0 2664 soap_done(&soap); // close master socket and detach environment
mas01cr@0 2665 }
mas01cr@0 2666
mas01cr@0 2667
mas01cr@0 2668 // web services
mas01cr@0 2669
mas01cr@0 2670 // SERVER SIDE
mas01cr@133 2671 int adb__status(struct soap* soap, xsd__string dbName, adb__statusResponse &adbStatusResponse){
mas01cr@21 2672 char* const argv[]={"audioDB",COM_STATUS,"-d",dbName};
mas01cr@21 2673 const unsigned argc = 4;
mas01cr@77 2674 try {
mas01cr@133 2675 audioDB(argc, argv, &adbStatusResponse);
mas01cr@77 2676 return SOAP_OK;
mas01cr@77 2677 } catch(char *err) {
mas01cr@77 2678 soap_receiver_fault(soap, err, "");
mas01cr@77 2679 return SOAP_FAULT;
mas01cr@77 2680 }
mas01cr@0 2681 }
mas01cr@0 2682
mas01cr@0 2683 // Literal translation of command line to web service
mas01cr@0 2684
mas01cr@133 2685 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResponse &adbQueryResponse){
mas01cr@0 2686 char queryType[256];
mas01cr@0 2687 for(int k=0; k<256; k++)
mas01cr@0 2688 queryType[k]='\0';
mas01cr@105 2689 if(qType == O2_POINT_QUERY)
mas01cr@0 2690 strncpy(queryType, "point", strlen("point"));
mas01cr@105 2691 else if (qType == O2_SEQUENCE_QUERY)
mas01cr@0 2692 strncpy(queryType, "sequence", strlen("sequence"));
mas01cr@105 2693 else if(qType == O2_TRACK_QUERY)
mas01mc@18 2694 strncpy(queryType,"track", strlen("track"));
mas01cr@0 2695 else
mas01cr@0 2696 strncpy(queryType, "", strlen(""));
mas01cr@0 2697
mas01cr@0 2698 if(pointNN==0)
mas01cr@0 2699 pointNN=10;
mas01mc@18 2700 if(trackNN==0)
mas01mc@18 2701 trackNN=10;
mas01cr@0 2702 if(seqLen==0)
mas01cr@0 2703 seqLen=16;
mas01cr@0 2704
mas01cr@0 2705 char qPosStr[256];
mas01cr@0 2706 sprintf(qPosStr, "%d", qPos);
mas01cr@0 2707 char pointNNStr[256];
mas01cr@0 2708 sprintf(pointNNStr,"%d",pointNN);
mas01mc@18 2709 char trackNNStr[256];
mas01mc@18 2710 sprintf(trackNNStr,"%d",trackNN);
mas01cr@0 2711 char seqLenStr[256];
mas01cr@0 2712 sprintf(seqLenStr,"%d",seqLen);
mas01cr@0 2713
mas01cr@0 2714 const char* argv[] ={
mas01cr@0 2715 "./audioDB",
mas01cr@0 2716 COM_QUERY,
mas01cr@0 2717 queryType, // Need to pass a parameter
mas01cr@0 2718 COM_DATABASE,
mas01cr@166 2719 ENSURE_STRING(dbName),
mas01cr@0 2720 COM_FEATURES,
mas01cr@166 2721 ENSURE_STRING(qKey),
mas01cr@0 2722 COM_KEYLIST,
mas01cr@166 2723 ENSURE_STRING(keyList),
mas01cr@0 2724 COM_TIMES,
mas01cr@166 2725 ENSURE_STRING(timesFileName),
mas01cr@0 2726 COM_QPOINT,
mas01cr@0 2727 qPosStr,
mas01cr@0 2728 COM_POINTNN,
mas01cr@0 2729 pointNNStr,
mas01mc@18 2730 COM_TRACKNN,
mas01mc@18 2731 trackNNStr, // Need to pass a parameter
mas01cr@0 2732 COM_SEQLEN,
mas01cr@0 2733 seqLenStr
mas01cr@0 2734 };
mas01cr@0 2735
mas01cr@0 2736 const unsigned argc = 19;
mas01cr@79 2737 try {
mas01cr@133 2738 audioDB(argc, (char* const*)argv, &adbQueryResponse);
mas01cr@79 2739 return SOAP_OK;
mas01cr@79 2740 } catch (char *err) {
mas01cr@79 2741 soap_receiver_fault(soap, err, "");
mas01cr@79 2742 return SOAP_FAULT;
mas01cr@79 2743 }
mas01cr@0 2744 }
mas01cr@0 2745
mas01cr@0 2746 int main(const unsigned argc, char* const argv[]){
mas01cr@0 2747 audioDB(argc, argv);
mas01cr@0 2748 }