annotate audioDB.cpp @ 171:bb934f91d85c powertable

Web services interface to the powertable functionality. * Implemented as a completely new SOAP method, called sequenceQuery * lightly tested with http://doc.gold.ac.uk/~mas01cr/poweradb.php (lacuna: need to pass relative-threshold and absolute-threshold always, even when they don't make sense.) Several uglinesses in the code, some of which will be resolved once all the various branches are merged into the trunk.
author mas01cr
date Wed, 14 Nov 2007 14:00:53 +0000
parents 9ec0f27e07b9
children
rev   line source
mas01cr@0 1 #include "audioDB.h"
mas01cr@0 2
mas01cr@105 3 #if defined(O2_DEBUG)
mas01cr@104 4 void sigterm_action(int signal, siginfo_t *info, void *context) {
mas01cr@104 5 exit(128+signal);
mas01cr@104 6 }
mas01cr@104 7
mas01cr@104 8 void sighup_action(int signal, siginfo_t *info, void *context) {
mas01cr@104 9 // FIXME: reread any configuration files
mas01cr@104 10 }
mas01cr@105 11 #endif
mas01cr@0 12
mas01cr@32 13 void audioDB::error(const char* a, const char* b, const char *sysFunc) {
mas01cr@77 14 if(isServer) {
mas01cr@81 15 /* FIXME: I think this is leaky -- we never delete err. actually
mas01cr@81 16 deleting it is tricky, though; it gets placed into some
mas01cr@81 17 soap-internal struct with uncertain extent... -- CSR,
mas01cr@81 18 2007-10-01 */
mas01cr@77 19 char *err = new char[256]; /* FIXME: overflows */
mas01cr@77 20 snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : "");
mas01cr@77 21 /* FIXME: actually we could usefully do with a properly structured
mas01cr@77 22 type, so that we can throw separate faultstring and details.
mas01cr@77 23 -- CSR, 2007-10-01 */
mas01cr@77 24 throw(err);
mas01cr@77 25 } else {
mas01cr@77 26 cerr << a << ": " << b << endl;
mas01cr@77 27 if (sysFunc) {
mas01cr@77 28 perror(sysFunc);
mas01cr@77 29 }
mas01cr@77 30 exit(1);
mas01cr@32 31 }
mas01cr@0 32 }
mas01cr@0 33
mas01cr@76 34 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
mas01cr@76 35 {
mas01cr@0 36 if(processArgs(argc, argv)<0){
mas01cr@0 37 printf("No command found.\n");
mas01cr@0 38 cmdline_parser_print_version ();
mas01cr@0 39 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 40 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 41 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 42 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 43 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 44 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 45 exit(1);
mas01cr@0 46 }
mas01cr@77 47
mas01cr@0 48 if(O2_ACTION(COM_SERVER))
mas01cr@0 49 startServer();
mas01cr@0 50
mas01cr@0 51 else if(O2_ACTION(COM_CREATE))
mas01cr@0 52 create(dbName);
mas01cr@0 53
mas01cr@0 54 else if(O2_ACTION(COM_INSERT))
mas01cr@0 55 insert(dbName, inFile);
mas01cr@0 56
mas01cr@0 57 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 58 batchinsert(dbName, inFile);
mas01cr@0 59
mas01cr@0 60 else if(O2_ACTION(COM_QUERY))
mas01cr@0 61 if(isClient)
mas01cr@0 62 ws_query(dbName, inFile, (char*)hostport);
mas01cr@0 63 else
mas01cr@76 64 query(dbName, inFile);
mas01cr@0 65
mas01cr@0 66 else if(O2_ACTION(COM_STATUS))
mas01cr@0 67 if(isClient)
mas01cr@0 68 ws_status(dbName,(char*)hostport);
mas01cr@0 69 else
mas01cr@0 70 status(dbName);
mas01cr@0 71
mas01cr@0 72 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 73 l2norm(dbName);
mas01cr@0 74
mas01cr@146 75 else if(O2_ACTION(COM_POWER))
mas01cr@146 76 power_flag(dbName);
mas01cr@146 77
mas01cr@0 78 else if(O2_ACTION(COM_DUMP))
mas01cr@0 79 dump(dbName);
mas01cr@0 80
mas01cr@0 81 else
mas01cr@0 82 error("Unrecognized command",command);
mas01cr@0 83 }
mas01cr@0 84
mas01cr@133 85 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 86 {
mas01cr@97 87 try {
mas01cr@97 88 processArgs(argc, argv);
mas01cr@97 89 isServer = 1; // FIXME: Hack
mas01cr@97 90 assert(O2_ACTION(COM_QUERY));
mas01cr@133 91 query(dbName, inFile, adbQueryResponse);
mas01cr@97 92 } catch(char *err) {
mas01cr@97 93 cleanup();
mas01cr@97 94 throw(err);
mas01cr@97 95 }
mas01cr@76 96 }
mas01cr@76 97
mas01cr@133 98 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 99 {
mas01cr@97 100 try {
mas01cr@97 101 processArgs(argc, argv);
mas01cr@97 102 isServer = 1; // FIXME: Hack
mas01cr@97 103 assert(O2_ACTION(COM_STATUS));
mas01cr@133 104 status(dbName, adbStatusResponse);
mas01cr@97 105 } catch(char *err) {
mas01cr@97 106 cleanup();
mas01cr@97 107 throw(err);
mas01cr@97 108 }
mas01cr@76 109 }
mas01cr@76 110
mas01cr@97 111 void audioDB::cleanup() {
mas01cr@122 112 cmdline_parser_free(&args_info);
mas01cr@0 113 if(indata)
mas01cr@0 114 munmap(indata,statbuf.st_size);
mas01cr@0 115 if(db)
mas01cr@128 116 munmap(db,dbH->dbSize);
mas01cr@0 117 if(dbfid>0)
mas01cr@0 118 close(dbfid);
mas01cr@0 119 if(infid>0)
mas01cr@0 120 close(infid);
mas01cr@0 121 if(dbH)
mas01cr@0 122 delete dbH;
mas01cr@0 123 }
mas01cr@0 124
mas01cr@97 125 audioDB::~audioDB(){
mas01cr@97 126 cleanup();
mas01cr@97 127 }
mas01cr@97 128
mas01cr@0 129 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 130
mas01cr@0 131 if(argc<2){
mas01cr@0 132 cmdline_parser_print_version ();
mas01cr@0 133 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 134 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 135 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 136 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 137 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 138 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 139 exit(0);
mas01cr@0 140 }
mas01cr@0 141
mas01cr@0 142 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@0 143 exit(1) ;
mas01cr@0 144
mas01cr@0 145 if(args_info.help_given){
mas01cr@0 146 cmdline_parser_print_help();
mas01cr@0 147 exit(0);
mas01cr@0 148 }
mas01cr@0 149
mas01cr@0 150 if(args_info.verbosity_given){
mas01cr@0 151 verbosity=args_info.verbosity_arg;
mas01cr@0 152 if(verbosity<0 || verbosity>10){
mas01cr@0 153 cerr << "Warning: verbosity out of range, setting to 1" << endl;
mas01cr@0 154 verbosity=1;
mas01cr@0 155 }
mas01cr@0 156 }
mas01cr@0 157
mas01cr@129 158 if(args_info.size_given) {
mas01cr@140 159 if (args_info.size_arg < 50 || args_info.size_arg > 4000) {
mas01cr@129 160 error("Size out of range", "");
mas01cr@129 161 }
mas01cr@129 162 size = args_info.size_arg * 1000000;
mas01cr@129 163 }
mas01cr@129 164
mas01mc@17 165 if(args_info.radius_given){
mas01mc@17 166 radius=args_info.radius_arg;
mas01mc@17 167 if(radius<=0 || radius>1000000000){
mas01cr@77 168 error("radius out of range");
mas01mc@17 169 }
mas01mc@17 170 else
mas01cr@60 171 if(verbosity>3) {
mas01mc@17 172 cerr << "Setting radius to " << radius << endl;
mas01cr@60 173 }
mas01mc@17 174 }
mas01mc@17 175
mas01cr@0 176 if(args_info.SERVER_given){
mas01cr@0 177 command=COM_SERVER;
mas01cr@0 178 port=args_info.SERVER_arg;
mas01cr@0 179 if(port<100 || port > 100000)
mas01cr@0 180 error("port out of range");
mas01cr@0 181 isServer=1;
mas01cr@105 182 #if defined(O2_DEBUG)
mas01cr@104 183 struct sigaction sa;
mas01cr@104 184 sa.sa_sigaction = sigterm_action;
mas01cr@104 185 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 186 sigaction(SIGTERM, &sa, NULL);
mas01cr@104 187 sa.sa_sigaction = sighup_action;
mas01cr@104 188 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 189 sigaction(SIGHUP, &sa, NULL);
mas01cr@105 190 #endif
mas01cr@0 191 return 0;
mas01cr@0 192 }
mas01cr@0 193
mas01cr@0 194 // No return on client command, find database command
mas01cr@105 195 if(args_info.client_given){
mas01cr@105 196 command=COM_CLIENT;
mas01cr@105 197 hostport=args_info.client_arg;
mas01cr@105 198 isClient=1;
mas01cr@105 199 }
mas01cr@0 200
mas01cr@105 201 if(args_info.NEW_given){
mas01cr@105 202 command=COM_CREATE;
mas01cr@105 203 dbName=args_info.database_arg;
mas01cr@105 204 return 0;
mas01cr@105 205 }
mas01cr@0 206
mas01cr@105 207 if(args_info.STATUS_given){
mas01cr@105 208 command=COM_STATUS;
mas01cr@105 209 dbName=args_info.database_arg;
mas01cr@105 210 return 0;
mas01cr@105 211 }
mas01cr@0 212
mas01cr@105 213 if(args_info.DUMP_given){
mas01cr@105 214 command=COM_DUMP;
mas01cr@105 215 dbName=args_info.database_arg;
mas01cr@131 216 output = args_info.output_arg;
mas01cr@105 217 return 0;
mas01cr@105 218 }
mas01cr@0 219
mas01cr@105 220 if(args_info.L2NORM_given){
mas01cr@105 221 command=COM_L2NORM;
mas01cr@105 222 dbName=args_info.database_arg;
mas01cr@105 223 return 0;
mas01cr@105 224 }
mas01cr@0 225
mas01cr@146 226 if(args_info.POWER_given){
mas01cr@146 227 command=COM_POWER;
mas01cr@146 228 dbName=args_info.database_arg;
mas01cr@146 229 return 0;
mas01cr@146 230 }
mas01cr@146 231
mas01cr@105 232 if(args_info.INSERT_given){
mas01cr@105 233 command=COM_INSERT;
mas01cr@105 234 dbName=args_info.database_arg;
mas01cr@105 235 inFile=args_info.features_arg;
mas01cr@105 236 if(args_info.key_given)
mas01cr@105 237 key=args_info.key_arg;
mas01cr@105 238 if(args_info.times_given){
mas01cr@105 239 timesFileName=args_info.times_arg;
mas01cr@105 240 if(strlen(timesFileName)>0){
mas01cr@105 241 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@105 242 error("Could not open times file for reading", timesFileName);
mas01cr@105 243 usingTimes=1;
mas01cr@105 244 }
mas01cr@105 245 }
mas01cr@144 246 if (args_info.power_given) {
mas01cr@144 247 powerFileName = args_info.power_arg;
mas01cr@144 248 if (strlen(powerFileName) > 0) {
mas01cr@144 249 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@144 250 error("Could not open power file for reading", powerFileName, "open");
mas01cr@144 251 }
mas01cr@144 252 usingPower = 1;
mas01cr@144 253 }
mas01cr@144 254 }
mas01cr@105 255 return 0;
mas01cr@105 256 }
mas01cr@105 257
mas01cr@105 258 if(args_info.BATCHINSERT_given){
mas01cr@105 259 command=COM_BATCHINSERT;
mas01cr@105 260 dbName=args_info.database_arg;
mas01cr@105 261 inFile=args_info.featureList_arg;
mas01cr@105 262 if(args_info.keyList_given)
mas01cr@105 263 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 264
mas01cr@105 265 /* TO DO: REPLACE WITH
mas01cr@0 266 if(args_info.keyList_given){
mas01mc@18 267 trackFileName=args_info.keyList_arg;
mas01mc@18 268 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
mas01mc@18 269 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 270 }
mas01cr@0 271 AND UPDATE BATCHINSERT()
mas01cr@105 272 */
mas01cr@105 273
mas01cr@105 274 if(args_info.timesList_given){
mas01cr@105 275 timesFileName=args_info.timesList_arg;
mas01cr@105 276 if(strlen(timesFileName)>0){
mas01cr@105 277 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@105 278 error("Could not open timesList file for reading", timesFileName);
mas01cr@105 279 usingTimes=1;
mas01cr@105 280 }
mas01cr@105 281 }
mas01cr@145 282 if(args_info.powerList_given){
mas01cr@145 283 powerFileName=args_info.powerList_arg;
mas01cr@145 284 if(strlen(powerFileName)>0){
mas01cr@145 285 if(!(powerFile = new ifstream(powerFileName,ios::in)))
mas01cr@145 286 error("Could not open powerList file for reading", powerFileName);
mas01cr@145 287 usingPower=1;
mas01cr@145 288 }
mas01cr@145 289 }
mas01cr@105 290 return 0;
mas01cr@105 291 }
mas01cr@105 292
mas01cr@105 293 // Query command and arguments
mas01cr@105 294 if(args_info.QUERY_given){
mas01cr@105 295 command=COM_QUERY;
mas01cr@105 296 dbName=args_info.database_arg;
mas01cr@105 297 inFile=args_info.features_arg;
mas01cr@105 298
mas01cr@105 299 if(args_info.keyList_given){
mas01cr@105 300 trackFileName=args_info.keyList_arg;
mas01cr@105 301 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
mas01cr@105 302 error("Could not open keyList file for reading",trackFileName);
mas01cr@105 303 }
mas01cr@105 304
mas01cr@105 305 if(args_info.times_given){
mas01cr@105 306 timesFileName=args_info.times_arg;
mas01cr@105 307 if(strlen(timesFileName)>0){
mas01cr@105 308 if(!(timesFile = new ifstream(timesFileName,ios::in)))
mas01cr@105 309 error("Could not open times file for reading", timesFileName);
mas01cr@105 310 usingTimes=1;
mas01cr@105 311 }
mas01cr@105 312 }
mas01cr@147 313
mas01cr@147 314 if(args_info.power_given){
mas01cr@147 315 powerFileName=args_info.power_arg;
mas01cr@147 316 if(strlen(powerFileName)>0){
mas01cr@147 317 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@147 318 error("Could not open power file for reading", powerFileName, "open");
mas01cr@147 319 }
mas01cr@147 320 usingPower = 1;
mas01cr@147 321 }
mas01cr@147 322 }
mas01cr@105 323
mas01cr@105 324 // query type
mas01cr@105 325 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@105 326 queryType=O2_TRACK_QUERY;
mas01cr@105 327 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@105 328 queryType=O2_POINT_QUERY;
mas01cr@105 329 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@105 330 queryType=O2_SEQUENCE_QUERY;
mas01cr@105 331 else
mas01cr@105 332 error("unsupported query type",args_info.QUERY_arg);
mas01cr@105 333
mas01cr@105 334 if(!args_info.exhaustive_flag){
mas01cr@105 335 queryPoint = args_info.qpoint_arg;
mas01cr@105 336 usingQueryPoint=1;
mas01cr@105 337 if(queryPoint<0 || queryPoint >10000)
mas01cr@105 338 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@105 339 }
mas01cr@105 340
mas01cr@105 341 pointNN = args_info.pointnn_arg;
mas01cr@105 342 if(pointNN < 1 || pointNN > 1000) {
mas01cr@105 343 error("pointNN out of range: 1 <= pointNN <= 1000");
mas01cr@105 344 }
mas01cr@105 345 trackNN = args_info.resultlength_arg;
mas01cr@105 346 if(trackNN < 1 || trackNN > 1000) {
mas01cr@105 347 error("resultlength out of range: 1 <= resultlength <= 1000");
mas01cr@105 348 }
mas01cr@105 349 sequenceLength = args_info.sequencelength_arg;
mas01cr@105 350 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01cr@105 351 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@105 352 }
mas01cr@105 353 sequenceHop = args_info.sequencehop_arg;
mas01cr@105 354 if(sequenceHop < 1 || sequenceHop > 1000) {
mas01cr@105 355 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01cr@105 356 }
mas01cr@147 357
mas01cr@147 358 if (args_info.absolute_threshold_given) {
mas01cr@147 359 if (args_info.absolute_threshold_arg >= 0) {
mas01cr@147 360 error("absolute threshold out of range: should be negative");
mas01cr@147 361 }
mas01cr@147 362 use_absolute_threshold = true;
mas01cr@147 363 absolute_threshold = args_info.absolute_threshold_arg;
mas01cr@147 364 }
mas01cr@147 365 if (args_info.relative_threshold_given) {
mas01cr@147 366 use_relative_threshold = true;
mas01cr@147 367 relative_threshold = args_info.relative_threshold_arg;
mas01cr@147 368 }
mas01cr@105 369 return 0;
mas01cr@105 370 }
mas01cr@105 371 return -1; // no command found
mas01cr@0 372 }
mas01cr@0 373
mas01cr@30 374 void audioDB::get_lock(int fd, bool exclusive) {
mas01cr@30 375 struct flock lock;
mas01cr@30 376 int status;
mas01cr@30 377
mas01cr@30 378 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
mas01cr@30 379 lock.l_whence = SEEK_SET;
mas01cr@30 380 lock.l_start = 0;
mas01cr@30 381 lock.l_len = 0; /* "the whole file" */
mas01cr@30 382
mas01cr@30 383 retry:
mas01cr@30 384 do {
mas01cr@30 385 status = fcntl(fd, F_SETLKW, &lock);
mas01cr@30 386 } while (status != 0 && errno == EINTR);
mas01cr@30 387
mas01cr@30 388 if (status) {
mas01cr@30 389 if (errno == EAGAIN) {
mas01cr@30 390 sleep(1);
mas01cr@30 391 goto retry;
mas01cr@30 392 } else {
mas01cr@32 393 error("fcntl lock error", "", "fcntl");
mas01cr@30 394 }
mas01cr@30 395 }
mas01cr@30 396 }
mas01cr@30 397
mas01cr@30 398 void audioDB::release_lock(int fd) {
mas01cr@30 399 struct flock lock;
mas01cr@30 400 int status;
mas01cr@30 401
mas01cr@30 402 lock.l_type = F_UNLCK;
mas01cr@30 403 lock.l_whence = SEEK_SET;
mas01cr@30 404 lock.l_start = 0;
mas01cr@30 405 lock.l_len = 0;
mas01cr@30 406
mas01cr@30 407 status = fcntl(fd, F_SETLKW, &lock);
mas01cr@30 408
mas01cr@30 409 if (status)
mas01cr@32 410 error("fcntl unlock error", "", "fcntl");
mas01cr@30 411 }
mas01cr@30 412
mas01cr@120 413 /* Make a new database.
mas01cr@120 414
mas01cr@120 415 The database consists of:
mas01cr@120 416
mas01cr@120 417 * a header (see dbTableHeader struct definition);
mas01cr@120 418 * keyTable: list of keys of tracks;
mas01cr@120 419 * trackTable: Maps implicit feature index to a feature vector
mas01cr@120 420 matrix (sizes of tracks)
mas01cr@120 421 * featureTable: Lots of doubles;
mas01cr@120 422 * timesTable: time points for each feature vector;
mas01cr@120 423 * l2normTable: squared l2norms for each feature vector.
mas01cr@120 424 */
mas01cr@0 425 void audioDB::create(const char* dbName){
mas01cr@31 426 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
mas01cr@32 427 error("Can't create database file", dbName, "open");
mas01cr@30 428 get_lock(dbfid, 1);
mas01cr@0 429
mas01cr@0 430 // go to the location corresponding to the last byte
mas01cr@129 431 if (lseek (dbfid, size - 1, SEEK_SET) == -1)
mas01cr@32 432 error("lseek error in db file", "", "lseek");
mas01cr@0 433
mas01cr@0 434 // write a dummy byte at the last location
mas01cr@0 435 if (write (dbfid, "", 1) != 1)
mas01cr@32 436 error("write error", "", "write");
mas01cr@0 437
mas01cr@0 438 // mmap the output file
mas01cr@60 439 if(verbosity) {
mas01cr@0 440 cerr << "header size:" << O2_HEADERSIZE << endl;
mas01cr@60 441 }
mas01cr@129 442 if ((db = (char*) mmap(0, size, PROT_READ | PROT_WRITE,
mas01cr@0 443 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@32 444 error("mmap error for creating database", "", "mmap");
mas01cr@0 445
mas01cr@0 446 dbH = new dbTableHeaderT();
mas01cr@0 447 assert(dbH);
mas01cr@0 448
mas01cr@140 449 unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE);
mas01cr@140 450
mas01cr@0 451 // Initialize header
mas01cr@108 452 dbH->magic = O2_MAGIC;
mas01cr@108 453 dbH->version = O2_FORMAT_VERSION;
mas01cr@108 454 dbH->numFiles = 0;
mas01cr@108 455 dbH->dim = 0;
mas01cr@108 456 dbH->flags = 0;
mas01cr@108 457 dbH->length = 0;
mas01cr@108 458 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8);
mas01cr@140 459 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles, 8);
mas01cr@140 460 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles, 8);
mas01cr@140 461 dbH->l2normTableOffset = ALIGN_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8);
mas01cr@140 462 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8);
mas01cr@142 463 dbH->powerTableOffset = ALIGN_DOWN(dbH->timesTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8);
mas01cr@129 464 dbH->dbSize = size;
mas01cr@0 465
mas01cr@0 466 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@60 467 if(verbosity) {
mas01cr@0 468 cerr << COM_CREATE << " " << dbName << endl;
mas01cr@60 469 }
mas01cr@0 470 }
mas01cr@0 471
mas01cr@0 472
mas01cr@0 473 void audioDB::drop(){
mas01cr@108 474 // FIXME: drop something? Should we even allow this?
mas01cr@0 475 }
mas01cr@0 476
mas01cr@120 477 void audioDB::initDBHeader(const char* dbName, bool forWrite) {
mas01cr@106 478 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
mas01cr@32 479 error("Can't open database file", dbName, "open");
mas01cr@106 480 }
mas01cr@120 481
mas01cr@30 482 get_lock(dbfid, forWrite);
mas01cr@0 483 // Get the database header info
mas01cr@0 484 dbH = new dbTableHeaderT();
mas01cr@0 485 assert(dbH);
mas01cr@0 486
mas01cr@106 487 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
mas01cr@106 488 error("error reading db header", dbName, "read");
mas01cr@106 489 }
mas01cr@0 490
mas01cr@108 491 if(dbH->magic == O2_OLD_MAGIC) {
mas01cr@108 492 // FIXME: if anyone ever complains, write the program to convert
mas01cr@108 493 // from the old audioDB format to the new...
mas01cr@108 494 error("database file has old O2 header", dbName);
mas01cr@108 495 }
mas01cr@0 496
mas01cr@106 497 if(dbH->magic != O2_MAGIC) {
mas01cr@106 498 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl;
mas01cr@108 499 error("database file has incorrect header", dbName);
mas01cr@108 500 }
mas01cr@108 501
mas01cr@108 502 if(dbH->version != O2_FORMAT_VERSION) {
mas01cr@108 503 error("database file has incorect version", dbName);
mas01cr@0 504 }
mas01cr@0 505
mas01cr@128 506 // FIXME: when changing file format version, remove this workaround.
mas01cr@128 507 if(dbH->dbSize == 0) {
mas01cr@128 508 dbH->dbSize = O2_DEFAULTDBSIZE;
mas01cr@128 509 }
mas01cr@128 510
mas01cr@120 511 // mmap the database file
mas01cr@128 512 if ((db = (char*) mmap(0, dbH->dbSize, PROT_READ | (forWrite ? PROT_WRITE : 0),
mas01cr@120 513 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
mas01cr@120 514 error("mmap error for initting tables of database", "", "mmap");
mas01cr@120 515
mas01cr@120 516 // Make some handy tables with correct types
mas01cr@120 517 fileTable = (char *) (db + dbH->fileTableOffset);
mas01cr@120 518 trackTable = (unsigned *) (db + dbH->trackTableOffset);
mas01cr@120 519 dataBuf = (double *) (db + dbH->dataOffset);
mas01cr@120 520 l2normTable = (double *) (db + dbH->l2normTableOffset);
mas01cr@120 521 timesTable = (double *) (db + dbH->timesTableOffset);
mas01cr@142 522 powerTable = (double *) (db + dbH->powerTableOffset);
mas01cr@120 523 }
mas01cr@120 524
mas01cr@120 525 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) {
mas01cr@120 526
mas01cr@120 527 initDBHeader(dbName, forWrite);
mas01cr@120 528
mas01cr@120 529 // open the input file
mas01cr@120 530 if (inFile && (infid = open(inFile, O_RDONLY)) < 0) {
mas01cr@120 531 error("can't open input file for reading", inFile, "open");
mas01cr@120 532 }
mas01cr@120 533 // find size of input file
mas01cr@120 534 if (inFile && fstat(infid, &statbuf) < 0) {
mas01cr@120 535 error("fstat error finding size of input", inFile, "fstat");
mas01cr@120 536 }
mas01cr@0 537 if(inFile)
mas01cr@106 538 if(dbH->dim == 0 && dbH->length == 0) // empty database
mas01cr@106 539 // initialize with input dimensionality
mas01cr@106 540 read(infid, &dbH->dim, sizeof(unsigned));
mas01cr@0 541 else {
mas01cr@0 542 unsigned test;
mas01cr@106 543 read(infid, &test, sizeof(unsigned));
mas01cr@106 544 if(dbH->dim != test) {
mas01cr@106 545 cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl;
mas01cr@0 546 error("feature dimensions do not match database table dimensions");
mas01cr@0 547 }
mas01cr@0 548 }
mas01cr@0 549
mas01cr@0 550 // mmap the input file
mas01cr@0 551 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
mas01cr@0 552 == (caddr_t) -1)
mas01cr@106 553 error("mmap error for input", inFile, "mmap");
mas01cr@0 554 }
mas01cr@0 555
mas01cr@0 556 void audioDB::insert(const char* dbName, const char* inFile){
mas01cr@0 557
mas01cr@27 558 initTables(dbName, 1, inFile);
mas01cr@0 559
mas01cr@0 560 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 561 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 562
mas01cr@146 563 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@146 564 error("Must use power with power-enabled database", dbName);
mas01cr@146 565
mas01cr@0 566 // Check that there is room for at least 1 more file
mas01cr@0 567 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int)))
mas01cr@0 568 error("No more room in database","insert failed: reason database is full.");
mas01cr@0 569
mas01cr@0 570 if(!key)
mas01cr@0 571 key=inFile;
mas01cr@0 572 // Linear scan of filenames check for pre-existing feature
mas01cr@0 573 unsigned alreadyInserted=0;
mas01cr@0 574 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 575 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){
mas01cr@0 576 alreadyInserted=1;
mas01cr@0 577 break;
mas01cr@0 578 }
mas01cr@0 579
mas01cr@0 580 if(alreadyInserted){
mas01cr@60 581 if(verbosity) {
mas01cr@0 582 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
mas01cr@60 583 }
mas01cr@0 584 return;
mas01cr@0 585 }
mas01cr@0 586
mas01mc@18 587 // Make a track index table of features to file indexes
mas01cr@0 588 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 589 if(!numVectors){
mas01cr@60 590 if(verbosity) {
mas01cr@0 591 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
mas01cr@60 592 }
mas01cr@0 593 // CLEAN UP
mas01cr@0 594 munmap(indata,statbuf.st_size);
mas01cr@128 595 munmap(db,dbH->dbSize);
mas01cr@0 596 close(infid);
mas01cr@0 597 return;
mas01cr@0 598 }
mas01cr@0 599
mas01cr@0 600 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
mas01cr@0 601
mas01cr@0 602 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 603
mas01cr@0 604 // Check times status and insert times from file
mas01cr@0 605 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 606 double* timesdata=timesTable+timesoffset;
mas01cr@0 607 assert(timesdata+numVectors<l2normTable);
mas01cr@0 608 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 609
mas01cr@144 610 double *powerdata = powerTable + timesoffset;
mas01cr@144 611 insertPowerData(numVectors, powerfd, powerdata);
mas01cr@144 612
mas01cr@0 613 // Increment file count
mas01cr@0 614 dbH->numFiles++;
mas01cr@0 615
mas01cr@0 616 // Update Header information
mas01cr@0 617 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 618
mas01cr@0 619 // Copy the header back to the database
mas01cr@0 620 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@0 621
mas01mc@18 622 // Update track to file index map
mas01mc@18 623 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
mas01mc@18 624 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 625
mas01cr@0 626 // Update the feature database
mas01cr@108 627 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
mas01cr@0 628
mas01cr@0 629 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 630 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@108 631 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 632
mas01cr@0 633 // Report status
mas01cr@0 634 status(dbName);
mas01cr@60 635 if(verbosity) {
mas01cr@0 636 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
mas01cr@0 637 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;
mas01cr@60 638 }
mas01cr@0 639
mas01cr@0 640 // CLEAN UP
mas01cr@0 641 munmap(indata,statbuf.st_size);
mas01cr@0 642 close(infid);
mas01cr@0 643 }
mas01cr@0 644
mas01cr@0 645 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){
mas01cr@0 646 unsigned numtimes=0;
mas01cr@0 647 if(usingTimes){
mas01cr@0 648 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles)
mas01cr@0 649 dbH->flags=dbH->flags|O2_FLAG_TIMES;
mas01cr@0 650 else if(!(dbH->flags&O2_FLAG_TIMES)){
mas01cr@0 651 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl;
mas01cr@0 652 usingTimes=0;
mas01cr@0 653 }
mas01cr@0 654
mas01cr@0 655 if(!timesFile->is_open()){
mas01cr@0 656 if(dbH->flags & O2_FLAG_TIMES){
mas01cr@0 657 munmap(indata,statbuf.st_size);
mas01cr@128 658 munmap(db,dbH->dbSize);
mas01cr@0 659 error("problem opening times file on timestamped database",timesFileName);
mas01cr@0 660 }
mas01cr@0 661 else{
mas01cr@0 662 cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl;
mas01cr@0 663 usingTimes=0;
mas01cr@0 664 }
mas01cr@0 665 }
mas01cr@0 666
mas01cr@0 667 // Process time file
mas01cr@0 668 if(usingTimes){
mas01cr@0 669 do{
mas01cr@0 670 *timesFile>>*timesdata++;
mas01cr@0 671 if(timesFile->eof())
mas01cr@0 672 break;
mas01cr@0 673 numtimes++;
mas01cr@0 674 }while(!timesFile->eof() && numtimes<numVectors);
mas01cr@0 675 if(!timesFile->eof()){
mas01cr@0 676 double dummy;
mas01cr@0 677 do{
mas01cr@0 678 *timesFile>>dummy;
mas01cr@0 679 if(timesFile->eof())
mas01cr@0 680 break;
mas01cr@0 681 numtimes++;
mas01cr@0 682 }while(!timesFile->eof());
mas01cr@0 683 }
mas01cr@0 684 if(numtimes<numVectors || numtimes>numVectors+2){
mas01cr@0 685 munmap(indata,statbuf.st_size);
mas01cr@128 686 munmap(db,dbH->dbSize);
mas01cr@0 687 close(infid);
mas01cr@0 688 cerr << "expected " << numVectors << " found " << numtimes << endl;
mas01cr@0 689 error("Times file is incorrect length for features file",inFile);
mas01cr@0 690 }
mas01cr@60 691 if(verbosity>2) {
mas01cr@0 692 cerr << "numtimes: " << numtimes << endl;
mas01cr@60 693 }
mas01cr@0 694 }
mas01cr@0 695 }
mas01cr@0 696 }
mas01cr@0 697
mas01cr@144 698 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
mas01cr@144 699 if (usingPower) {
mas01cr@146 700 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@146 701 error("Cannot insert power data on non-power DB", dbName);
mas01cr@146 702 }
mas01cr@146 703
mas01cr@144 704 int one;
mas01cr@144 705 unsigned int count;
mas01cr@144 706
mas01cr@144 707 count = read(powerfd, &one, sizeof(unsigned int));
mas01cr@144 708 if (count != sizeof(unsigned int)) {
mas01cr@144 709 error("powerfd read failed", "int", "read");
mas01cr@144 710 }
mas01cr@144 711 if (one != 1) {
mas01cr@144 712 error("dimensionality of power file not 1", powerFileName);
mas01cr@144 713 }
mas01cr@144 714
mas01cr@144 715 // FIXME: should check that the powerfile is the right size for
mas01cr@144 716 // this. -- CSR, 2007-10-30
mas01cr@144 717 count = read(powerfd, powerdata, numVectors * sizeof(double));
mas01cr@144 718 if (count != numVectors * sizeof(double)) {
mas01cr@144 719 error("powerfd read failed", "double", "read");
mas01cr@144 720 }
mas01cr@144 721 }
mas01cr@144 722 }
mas01cr@144 723
mas01cr@120 724 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@0 725
mas01cr@120 726 initDBHeader(dbName, true);
mas01cr@0 727
mas01cr@0 728 if(!key)
mas01cr@0 729 key=inFile;
mas01cr@0 730 ifstream *filesIn = 0;
mas01cr@0 731 ifstream *keysIn = 0;
mas01cr@0 732 ifstream* thisTimesFile = 0;
mas01cr@145 733 int thispowerfd = 0;
mas01cr@0 734
mas01cr@0 735 if(!(filesIn = new ifstream(inFile)))
mas01cr@0 736 error("Could not open batch in file", inFile);
mas01cr@0 737 if(key && key!=inFile)
mas01cr@0 738 if(!(keysIn = new ifstream(key)))
mas01cr@0 739 error("Could not open batch key file",key);
mas01cr@0 740
mas01cr@0 741 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 742 error("Must use timestamps with timestamped database","use --times");
mas01cr@0 743
mas01cr@146 744 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
mas01cr@146 745 error("Must use power with power-enabled database", dbName);
mas01cr@146 746
mas01cr@0 747 unsigned totalVectors=0;
mas01cr@0 748 char *thisKey = new char[MAXSTR];
mas01cr@0 749 char *thisFile = new char[MAXSTR];
mas01cr@0 750 char *thisTimesFileName = new char[MAXSTR];
mas01cr@145 751 char *thisPowerFileName = new char[MAXSTR];
mas01cr@120 752
mas01cr@0 753 do{
mas01cr@0 754 filesIn->getline(thisFile,MAXSTR);
mas01cr@0 755 if(key && key!=inFile)
mas01cr@0 756 keysIn->getline(thisKey,MAXSTR);
mas01cr@0 757 else
mas01cr@0 758 thisKey = thisFile;
mas01cr@0 759 if(usingTimes)
mas01cr@0 760 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@145 761 if(usingPower)
mas01cr@145 762 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@0 763
mas01cr@0 764 if(filesIn->eof())
mas01cr@0 765 break;
mas01cr@0 766
mas01cr@0 767 // open the input file
mas01cr@0 768 if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0)
mas01cr@32 769 error("can't open feature file for reading", thisFile, "open");
mas01cr@0 770
mas01cr@0 771 // find size of input file
mas01cr@0 772 if (thisFile && fstat (infid,&statbuf) < 0)
mas01cr@32 773 error("fstat error finding size of input", "", "fstat");
mas01cr@0 774
mas01cr@0 775 // Check that there is room for at least 1 more file
mas01cr@0 776 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
mas01cr@0 777 error("No more room in database","insert failed: reason database is full.");
mas01cr@0 778
mas01cr@0 779 if(thisFile)
mas01cr@0 780 if(dbH->dim==0 && dbH->length==0) // empty database
mas01cr@0 781 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
mas01cr@0 782 else {
mas01cr@0 783 unsigned test;
mas01cr@0 784 read(infid,&test,sizeof(unsigned));
mas01cr@0 785 if(dbH->dim!=test){
mas01cr@0 786 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
mas01cr@0 787 error("feature dimensions do not match database table dimensions");
mas01cr@0 788 }
mas01cr@0 789 }
mas01cr@0 790
mas01cr@0 791 // mmap the input file
mas01cr@0 792 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
mas01cr@0 793 == (caddr_t) -1)
mas01cr@32 794 error("mmap error for input", "", "mmap");
mas01cr@0 795
mas01cr@0 796
mas01cr@0 797 // Linear scan of filenames check for pre-existing feature
mas01cr@0 798 unsigned alreadyInserted=0;
mas01cr@0 799
mas01cr@0 800 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 801 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){
mas01cr@0 802 alreadyInserted=1;
mas01cr@0 803 break;
mas01cr@0 804 }
mas01cr@0 805
mas01cr@0 806 if(alreadyInserted){
mas01cr@60 807 if(verbosity) {
mas01cr@0 808 cerr << "Warning: key already exists in database:" << thisKey << endl;
mas01cr@60 809 }
mas01cr@0 810 }
mas01cr@0 811 else{
mas01cr@0 812
mas01mc@18 813 // Make a track index table of features to file indexes
mas01cr@0 814 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 815 if(!numVectors){
mas01cr@60 816 if(verbosity) {
mas01cr@0 817 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
mas01cr@60 818 }
mas01cr@0 819 }
mas01cr@145 820 else{
mas01cr@0 821 if(usingTimes){
mas01cr@0 822 if(timesFile->eof())
mas01cr@0 823 error("not enough timestamp files in timesList");
mas01cr@0 824 thisTimesFile=new ifstream(thisTimesFileName,ios::in);
mas01cr@0 825 if(!thisTimesFile->is_open())
mas01cr@0 826 error("Cannot open timestamp file",thisTimesFileName);
mas01cr@0 827 unsigned insertoffset=dbH->length;
mas01cr@0 828 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
mas01cr@0 829 double* timesdata=timesTable+timesoffset;
mas01cr@0 830 assert(timesdata+numVectors<l2normTable);
mas01cr@0 831 insertTimeStamps(numVectors,thisTimesFile,timesdata);
mas01cr@0 832 if(thisTimesFile)
mas01cr@0 833 delete thisTimesFile;
mas01cr@0 834 }
mas01cr@145 835
mas01cr@145 836 if (usingPower) {
mas01cr@145 837 if(powerFile->eof()) {
mas01cr@145 838 error("not enough power files in powerList", powerFileName);
mas01cr@145 839 }
mas01cr@145 840 thispowerfd = open(thisPowerFileName, O_RDONLY);
mas01cr@145 841 if (thispowerfd < 0) {
mas01cr@145 842 error("failed to open power file", thisPowerFileName);
mas01cr@145 843 }
mas01cr@145 844 unsigned insertoffset = dbH->length;
mas01cr@145 845 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
mas01cr@145 846 double *powerdata = powerTable + poweroffset;
mas01cr@145 847 insertPowerData(numVectors, thispowerfd, powerdata);
mas01cr@145 848 if (0 < thispowerfd) {
mas01cr@145 849 close(thispowerfd);
mas01cr@145 850 }
mas01cr@145 851 }
mas01cr@0 852 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
mas01cr@0 853
mas01cr@0 854 unsigned insertoffset = dbH->length;// Store current state
mas01cr@0 855
mas01cr@0 856 // Increment file count
mas01cr@0 857 dbH->numFiles++;
mas01cr@0 858
mas01cr@0 859 // Update Header information
mas01cr@0 860 dbH->length+=(statbuf.st_size-sizeof(int));
mas01cr@0 861 // Copy the header back to the database
mas01cr@0 862 memcpy (db, dbH, sizeof(dbTableHeaderT));
mas01cr@0 863
mas01mc@18 864 // Update track to file index map
mas01mc@18 865 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
mas01mc@18 866 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
mas01cr@0 867
mas01cr@0 868 // Update the feature database
mas01cr@108 869 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
mas01cr@0 870
mas01cr@0 871 // Norm the vectors on input if the database is already L2 normed
mas01cr@0 872 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@108 873 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append
mas01cr@0 874
mas01cr@0 875 totalVectors+=numVectors;
mas01cr@0 876 }
mas01cr@0 877 }
mas01cr@0 878 // CLEAN UP
mas01cr@0 879 munmap(indata,statbuf.st_size);
mas01cr@0 880 close(infid);
mas01cr@0 881 }while(!filesIn->eof());
mas01mc@12 882
mas01cr@60 883 if(verbosity) {
mas01cr@0 884 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
mas01cr@0 885 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
mas01cr@60 886 }
mas01cr@0 887
mas01cr@0 888 // Report status
mas01cr@0 889 status(dbName);
mas01cr@0 890 }
mas01cr@0 891
mas01cr@76 892 // FIXME: this can't propagate the sequence length argument (used for
mas01cr@76 893 // dudCount). See adb__status() definition for the other half of
mas01cr@76 894 // this. -- CSR, 2007-10-01
mas01cr@0 895 void audioDB::ws_status(const char*dbName, char* hostport){
mas01cr@0 896 struct soap soap;
mas01cr@133 897 adb__statusResponse adbStatusResponse;
mas01cr@0 898
mas01cr@0 899 // Query an existing adb database
mas01cr@0 900 soap_init(&soap);
mas01cr@133 901 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResponse)==SOAP_OK) {
mas01cr@133 902 cout << "numFiles = " << adbStatusResponse.result.numFiles << endl;
mas01cr@133 903 cout << "dim = " << adbStatusResponse.result.dim << endl;
mas01cr@133 904 cout << "length = " << adbStatusResponse.result.length << endl;
mas01cr@133 905 cout << "dudCount = " << adbStatusResponse.result.dudCount << endl;
mas01cr@133 906 cout << "nullCount = " << adbStatusResponse.result.nullCount << endl;
mas01cr@133 907 cout << "flags = " << adbStatusResponse.result.flags << endl;
mas01cr@76 908 } else {
mas01cr@0 909 soap_print_fault(&soap,stderr);
mas01cr@76 910 }
mas01cr@0 911
mas01cr@0 912 soap_destroy(&soap);
mas01cr@0 913 soap_end(&soap);
mas01cr@0 914 soap_done(&soap);
mas01cr@0 915 }
mas01cr@0 916
mas01mc@18 917 void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){
mas01cr@0 918 struct soap soap;
mas01cr@133 919 adb__queryResponse adbQueryResponse;
mas01cr@0 920
mas01cr@0 921 soap_init(&soap);
mas01cr@0 922 if(soap_call_adb__query(&soap,hostport,NULL,
mas01mc@18 923 (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName,
mas01cr@133 924 queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResponse)==SOAP_OK){
mas01cr@133 925 //std::cerr << "result list length:" << adbQueryResponse.result.__sizeRlist << std::endl;
mas01cr@133 926 for(int i=0; i<adbQueryResponse.result.__sizeRlist; i++)
mas01cr@133 927 std::cout << adbQueryResponse.result.Rlist[i] << " " << adbQueryResponse.result.Dist[i]
mas01cr@133 928 << " " << adbQueryResponse.result.Qpos[i] << " " << adbQueryResponse.result.Spos[i] << std::endl;
mas01cr@0 929 }
mas01cr@0 930 else
mas01cr@0 931 soap_print_fault(&soap,stderr);
mas01cr@0 932
mas01cr@0 933 soap_destroy(&soap);
mas01cr@0 934 soap_end(&soap);
mas01cr@0 935 soap_done(&soap);
mas01cr@0 936
mas01cr@0 937 }
mas01cr@0 938
mas01cr@0 939
mas01cr@133 940 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
mas01cr@0 941 if(!dbH)
mas01cr@27 942 initTables(dbName, 0, 0);
mas01cr@0 943
mas01cr@0 944 unsigned dudCount=0;
mas01cr@0 945 unsigned nullCount=0;
mas01cr@0 946 for(unsigned k=0; k<dbH->numFiles; k++){
mas01mc@18 947 if(trackTable[k]<sequenceLength){
mas01cr@0 948 dudCount++;
mas01mc@18 949 if(!trackTable[k])
mas01cr@76 950 nullCount++;
mas01cr@0 951 }
mas01cr@0 952 }
mas01cr@76 953
mas01cr@133 954 if(adbStatusResponse == 0) {
mas01cr@76 955
mas01cr@76 956 // Update Header information
mas01cr@76 957 cout << "num files:" << dbH->numFiles << endl;
mas01cr@76 958 cout << "data dim:" << dbH->dim <<endl;
mas01cr@76 959 if(dbH->dim>0){
mas01cr@76 960 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
mas01cr@108 961 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
mas01cr@76 962 }
mas01cr@108 963 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl;
mas01cr@108 964 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
mas01cr@108 965 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl;
mas01cr@76 966 cout << "flags:" << dbH->flags << endl;
mas01cr@76 967
mas01cr@76 968 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
mas01cr@76 969 } else {
mas01cr@133 970 adbStatusResponse->result.numFiles = dbH->numFiles;
mas01cr@133 971 adbStatusResponse->result.dim = dbH->dim;
mas01cr@133 972 adbStatusResponse->result.length = dbH->length;
mas01cr@133 973 adbStatusResponse->result.dudCount = dudCount;
mas01cr@133 974 adbStatusResponse->result.nullCount = nullCount;
mas01cr@133 975 adbStatusResponse->result.flags = dbH->flags;
mas01cr@76 976 }
mas01cr@0 977 }
mas01cr@0 978
mas01cr@0 979 void audioDB::dump(const char* dbName){
mas01cr@131 980 if(!dbH) {
mas01cr@27 981 initTables(dbName, 0, 0);
mas01mc@17 982 }
mas01cr@0 983
mas01cr@131 984 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
mas01cr@131 985 error("error making output directory", output, "mkdir");
mas01cr@131 986 }
mas01cr@131 987
mas01cr@131 988 char *cwd = new char[PATH_MAX];
mas01cr@131 989
mas01cr@131 990 if ((getcwd(cwd, PATH_MAX)) == 0) {
mas01cr@131 991 error("error getting working directory", "", "getcwd");
mas01cr@131 992 }
mas01cr@131 993
mas01cr@131 994 if((chdir(output)) < 0) {
mas01cr@131 995 error("error changing working directory", output, "chdir");
mas01cr@131 996 }
mas01cr@131 997
mas01cr@153 998 int fLfd, tLfd = 0, pLfd = 0, kLfd;
mas01cr@153 999 FILE *fLFile, *tLFile = 0, *pLFile = 0, *kLFile;
mas01cr@131 1000
mas01cr@131 1001 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 1002 error("error creating featureList file", "featureList.txt", "open");
mas01cr@131 1003 }
mas01cr@153 1004
mas01cr@131 1005 int times = dbH->flags & O2_FLAG_TIMES;
mas01cr@131 1006 if (times) {
mas01cr@131 1007 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 1008 error("error creating timesList file", "timesList.txt", "open");
mas01cr@131 1009 }
mas01cr@131 1010 }
mas01cr@153 1011
mas01cr@153 1012 int power = dbH->flags & O2_FLAG_POWER;
mas01cr@153 1013 if (power) {
mas01cr@153 1014 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@153 1015 error("error creating powerList file", "powerList.txt", "open");
mas01cr@153 1016 }
mas01cr@153 1017 }
mas01cr@153 1018
mas01cr@131 1019 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 1020 error("error creating keyList file", "keyList.txt", "open");
mas01cr@131 1021 }
mas01cr@131 1022
mas01cr@131 1023 /* can these fail? I sincerely hope not. */
mas01cr@131 1024 fLFile = fdopen(fLfd, "w");
mas01cr@131 1025 if (times) {
mas01cr@131 1026 tLFile = fdopen(tLfd, "w");
mas01cr@131 1027 }
mas01cr@153 1028 if (power) {
mas01cr@153 1029 pLFile = fdopen(pLfd, "w");
mas01cr@153 1030 }
mas01cr@131 1031 kLFile = fdopen(kLfd, "w");
mas01cr@131 1032
mas01cr@131 1033 char *fName = new char[256];
mas01cr@153 1034 int ffd, pfd;
mas01cr@131 1035 FILE *tFile;
mas01cr@131 1036 unsigned pos = 0;
mas01cr@131 1037 for(unsigned k = 0; k < dbH->numFiles; k++) {
mas01cr@131 1038 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE);
mas01cr@131 1039 snprintf(fName, 256, "%05d.features", k);
mas01cr@131 1040 if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@131 1041 error("error creating feature file", fName, "open");
mas01cr@131 1042 }
mas01cr@131 1043 if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) {
mas01cr@131 1044 error("error writing dimensions", fName, "write");
mas01cr@131 1045 }
mas01cr@131 1046
mas01cr@131 1047 if ((write(ffd, dataBuf + pos * dbH->dim, trackTable[k] * dbH->dim * sizeof(double))) < 0) {
mas01cr@131 1048 error("error writing data", fName, "write");
mas01cr@131 1049 }
mas01cr@131 1050 fprintf(fLFile, "%s\n", fName);
mas01cr@131 1051 close(ffd);
mas01cr@131 1052
mas01cr@131 1053 if(times) {
mas01cr@131 1054 snprintf(fName, 256, "%05d.times", k);
mas01cr@131 1055 tFile = fopen(fName, "w");
mas01cr@131 1056 for(unsigned i = 0; i < trackTable[k]; i++) {
mas01cr@131 1057 // KLUDGE: specifying 16 digits of precision after the decimal
mas01cr@131 1058 // point is (but check this!) sufficient to uniquely identify
mas01cr@131 1059 // doubles; however, that will cause ugliness, as that's
mas01cr@131 1060 // vastly too many for most values of interest. Moving to %a
mas01cr@131 1061 // here and scanf() in the timesFile reading might fix this.
mas01cr@131 1062 // -- CSR, 2007-10-19
mas01cr@131 1063 fprintf(tFile, "%.16e\n", *(timesTable + pos + i));
mas01cr@131 1064 }
mas01cr@131 1065 fprintf(tLFile, "%s\n", fName);
mas01cr@131 1066 }
mas01cr@131 1067
mas01cr@153 1068 if (power) {
mas01cr@153 1069 uint32_t one = 1;
mas01cr@153 1070 snprintf(fName, 256, "%05d.power", k);
mas01cr@153 1071 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@153 1072 error("error creating power file", fName, "open");
mas01cr@153 1073 }
mas01cr@153 1074 if ((write(pfd, &one, sizeof(uint32_t))) < 0) {
mas01cr@153 1075 error("error writing one", fName, "write");
mas01cr@153 1076 }
mas01cr@153 1077 if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) {
mas01cr@153 1078 error("error writing data", fName, "write");
mas01cr@153 1079 }
mas01cr@153 1080 fprintf(pLFile, "%s\n", fName);
mas01cr@153 1081 close(pfd);
mas01cr@153 1082 }
mas01cr@153 1083
mas01cr@131 1084 pos += trackTable[k];
mas01cr@131 1085 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
mas01cr@131 1086 }
mas01cr@131 1087
mas01cr@131 1088 FILE *scriptFile;
mas01cr@131 1089 scriptFile = fopen("restore.sh", "w");
mas01cr@131 1090 fprintf(scriptFile, "\
mas01cr@131 1091 #! /bin/sh\n\
mas01cr@131 1092 #\n\
mas01cr@131 1093 # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\
mas01cr@131 1094 \n\
mas01cr@131 1095 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
mas01cr@131 1096 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
mas01cr@131 1097 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000);
mas01cr@131 1098 if(dbH->flags & O2_FLAG_L2NORM) {
mas01cr@131 1099 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
mas01cr@131 1100 }
mas01cr@153 1101 if(power) {
mas01cr@153 1102 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n");
mas01cr@153 1103 }
mas01cr@131 1104 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt");
mas01cr@131 1105 if(times) {
mas01cr@131 1106 fprintf(scriptFile, " -T timesList.txt");
mas01cr@131 1107 }
mas01cr@153 1108 if(power) {
mas01cr@153 1109 fprintf(scriptFile, " -W powerList.txt");
mas01cr@153 1110 }
mas01cr@131 1111 fprintf(scriptFile, "\n");
mas01cr@131 1112 fclose(scriptFile);
mas01cr@131 1113
mas01cr@131 1114 if((chdir(cwd)) < 0) {
mas01cr@131 1115 error("error changing working directory", cwd, "chdir");
mas01cr@131 1116 }
mas01cr@131 1117
mas01cr@131 1118 fclose(fLFile);
mas01cr@131 1119 if(times) {
mas01cr@131 1120 fclose(tLFile);
mas01cr@131 1121 }
mas01cr@153 1122 if(power) {
mas01cr@153 1123 fclose(pLFile);
mas01cr@153 1124 }
mas01cr@131 1125 fclose(kLFile);
mas01cr@131 1126 delete[] fName;
mas01cr@131 1127
mas01cr@0 1128 status(dbName);
mas01cr@0 1129 }
mas01cr@0 1130
mas01cr@0 1131 void audioDB::l2norm(const char* dbName){
mas01cr@50 1132 initTables(dbName, true, 0);
mas01cr@0 1133 if(dbH->length>0){
mas01cr@0 1134 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@0 1135 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 1136 }
mas01cr@0 1137 // Update database flags
mas01cr@0 1138 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 1139 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 1140 }
mas01cr@146 1141
mas01cr@146 1142 void audioDB::power_flag(const char *dbName) {
mas01cr@146 1143 initTables(dbName, true, 0);
mas01cr@146 1144 if (dbH->length > 0) {
mas01cr@146 1145 error("cannot turn on power storage for non-empty database", dbName);
mas01cr@146 1146 }
mas01cr@146 1147 dbH->flags |= O2_FLAG_POWER;
mas01cr@146 1148 memcpy(db, dbH, O2_HEADERSIZE);
mas01cr@146 1149 }
mas01cr@0 1150
mas01cr@147 1151 bool audioDB::powers_acceptable(double p1, double p2) {
mas01cr@147 1152 if (use_absolute_threshold) {
mas01cr@147 1153 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
mas01cr@147 1154 return false;
mas01cr@147 1155 }
mas01cr@147 1156 }
mas01cr@147 1157 if (use_relative_threshold) {
mas01cr@147 1158 if (fabs(p1-p2) > fabs(relative_threshold)) {
mas01cr@147 1159 return false;
mas01cr@147 1160 }
mas01cr@147 1161 }
mas01cr@147 1162 return true;
mas01cr@147 1163 }
mas01cr@147 1164
mas01cr@0 1165
mas01cr@133 1166 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@0 1167 switch(queryType){
mas01cr@105 1168 case O2_POINT_QUERY:
mas01cr@133 1169 pointQuery(dbName, inFile, adbQueryResponse);
mas01cr@0 1170 break;
mas01cr@105 1171 case O2_SEQUENCE_QUERY:
mas01mc@17 1172 if(radius==0)
mas01cr@133 1173 trackSequenceQueryNN(dbName, inFile, adbQueryResponse);
mas01mc@17 1174 else
mas01cr@133 1175 trackSequenceQueryRad(dbName, inFile, adbQueryResponse);
mas01cr@0 1176 break;
mas01cr@105 1177 case O2_TRACK_QUERY:
mas01cr@133 1178 trackPointQuery(dbName, inFile, adbQueryResponse);
mas01cr@0 1179 break;
mas01cr@0 1180 default:
mas01cr@0 1181 error("unrecognized queryType in query()");
mas01cr@0 1182
mas01cr@0 1183 }
mas01cr@0 1184 }
mas01cr@0 1185
mas01cr@0 1186 //return ordinal position of key in keyTable
mas01cr@0 1187 unsigned audioDB::getKeyPos(char* key){
mas01cr@0 1188 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@0 1189 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
mas01cr@0 1190 return k;
mas01cr@0 1191 error("Key not found",key);
mas01cr@0 1192 return O2_ERR_KEYNOTFOUND;
mas01cr@0 1193 }
mas01cr@0 1194
mas01cr@0 1195 // Basic point query engine
mas01cr@133 1196 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@0 1197
mas01cr@27 1198 initTables(dbName, 0, inFile);
mas01cr@0 1199
mas01cr@0 1200 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1201 // we use stdout in this stub version
mas01cr@0 1202 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1203
mas01cr@0 1204 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1205 double* data = dataBuf;
mas01cr@0 1206 double* queryCopy = 0;
mas01cr@0 1207
mas01cr@0 1208 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 1209 // Make a copy of the query
mas01cr@0 1210 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1211 qNorm = new double[numVectors];
mas01cr@0 1212 assert(queryCopy&&qNorm);
mas01cr@0 1213 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1214 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1215 query = queryCopy;
mas01cr@0 1216 }
mas01cr@0 1217
mas01cr@0 1218 // Make temporary dynamic memory for results
mas01cr@0 1219 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@0 1220 double distances[pointNN];
mas01cr@0 1221 unsigned qIndexes[pointNN];
mas01cr@0 1222 unsigned sIndexes[pointNN];
mas01cr@0 1223 for(unsigned k=0; k<pointNN; k++){
mas01cr@62 1224 distances[k]=-DBL_MAX;
mas01cr@0 1225 qIndexes[k]=~0;
mas01cr@0 1226 sIndexes[k]=~0;
mas01cr@0 1227 }
mas01cr@0 1228
mas01cr@0 1229 unsigned j=numVectors;
mas01cr@0 1230 unsigned k,l,n;
mas01cr@0 1231 double thisDist;
mas01cr@0 1232
mas01cr@0 1233 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
mas01cr@0 1234 double meanQdur = 0;
mas01cr@0 1235 double* timesdata = 0;
mas01cr@0 1236 double* dbdurs = 0;
mas01cr@0 1237
mas01cr@0 1238 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1239 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1240 usingTimes=0;
mas01cr@0 1241 }
mas01cr@0 1242
mas01cr@0 1243 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1244 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1245
mas01cr@0 1246 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1247 timesdata = new double[numVectors];
mas01cr@0 1248 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1249 // Calculate durations of points
mas01cr@0 1250 for(k=0; k<numVectors-1; k++){
mas01cr@0 1251 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1252 meanQdur+=timesdata[k];
mas01cr@0 1253 }
mas01cr@0 1254 meanQdur/=k;
mas01cr@0 1255 // Individual exhaustive timepoint durations
mas01cr@0 1256 dbdurs = new double[totalVecs];
mas01cr@0 1257 for(k=0; k<totalVecs-1; k++)
mas01cr@0 1258 dbdurs[k]=timesTable[k+1]-timesTable[k];
mas01cr@0 1259 j--; // decrement vector counter by one
mas01cr@0 1260 }
mas01cr@0 1261
mas01cr@0 1262 if(usingQueryPoint)
mas01cr@0 1263 if(queryPoint>numVectors-1)
mas01cr@0 1264 error("queryPoint > numVectors in query");
mas01cr@0 1265 else{
mas01cr@60 1266 if(verbosity>1) {
mas01cr@0 1267 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 1268 }
mas01cr@0 1269 query=query+queryPoint*dbH->dim;
mas01cr@0 1270 numVectors=queryPoint+1;
mas01cr@0 1271 j=1;
mas01cr@0 1272 }
mas01cr@0 1273
mas01cr@0 1274 gettimeofday(&tv1, NULL);
mas01cr@0 1275 while(j--){ // query
mas01cr@0 1276 data=dataBuf;
mas01cr@0 1277 k=totalVecs; // number of database vectors
mas01cr@0 1278 while(k--){ // database
mas01cr@0 1279 thisDist=0;
mas01cr@0 1280 l=dbH->dim;
mas01cr@0 1281 double* q=query;
mas01cr@0 1282 while(l--)
mas01cr@0 1283 thisDist+=*q++**data++;
mas01cr@0 1284 if(!usingTimes ||
mas01cr@0 1285 (usingTimes
mas01cr@0 1286 && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){
mas01cr@0 1287 n=pointNN;
mas01cr@0 1288 while(n--){
mas01cr@0 1289 if(thisDist>=distances[n]){
mas01cr@0 1290 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1291 // Copy all values above up the queue
mas01cr@0 1292 for( l=pointNN-1 ; l >= n+1 ; l--){
mas01cr@0 1293 distances[l]=distances[l-1];
mas01cr@0 1294 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1295 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1296 }
mas01cr@0 1297 distances[n]=thisDist;
mas01cr@0 1298 qIndexes[n]=numVectors-j-1;
mas01cr@0 1299 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
mas01cr@0 1300 break;
mas01cr@0 1301 }
mas01cr@0 1302 }
mas01cr@0 1303 else
mas01cr@0 1304 break;
mas01cr@0 1305 }
mas01cr@0 1306 }
mas01cr@0 1307 }
mas01cr@0 1308 // Move query pointer to next query point
mas01cr@0 1309 query+=dbH->dim;
mas01cr@0 1310 }
mas01cr@0 1311
mas01cr@0 1312 gettimeofday(&tv2, NULL);
mas01cr@60 1313 if(verbosity>1) {
mas01cr@0 1314 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@60 1315 }
mas01cr@0 1316
mas01cr@133 1317 if(adbQueryResponse==0){
mas01cr@0 1318 // Output answer
mas01cr@0 1319 // Loop over nearest neighbours
mas01cr@0 1320 for(k=0; k < pointNN; k++){
mas01cr@0 1321 // Scan for key
mas01mc@18 1322 unsigned cumTrack=0;
mas01cr@0 1323 for(l=0 ; l<dbH->numFiles; l++){
mas01mc@18 1324 cumTrack+=trackTable[l];
mas01mc@18 1325 if(sIndexes[k]<cumTrack){
mas01cr@0 1326 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
mas01mc@18 1327 << sIndexes[k]+trackTable[l]-cumTrack << endl;
mas01cr@0 1328 break;
mas01cr@0 1329 }
mas01cr@0 1330 }
mas01cr@0 1331 }
mas01cr@0 1332 }
mas01cr@0 1333 else{ // Process Web Services Query
mas01cr@89 1334 int listLen;
mas01cr@89 1335 for(k = 0; k < pointNN; k++) {
mas01cr@89 1336 if(distances[k] == -DBL_MAX)
mas01cr@89 1337 break;
mas01cr@89 1338 }
mas01cr@89 1339 listLen = k;
mas01cr@89 1340
mas01cr@133 1341 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 1342 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 1343 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 1344 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 1345 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 1346 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 1347 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 1348 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 1349 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 1350 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 1351 adbQueryResponse->result.Dist[k]=distances[k];
mas01cr@133 1352 adbQueryResponse->result.Qpos[k]=qIndexes[k];
mas01mc@18 1353 unsigned cumTrack=0;
mas01cr@0 1354 for(l=0 ; l<dbH->numFiles; l++){
mas01mc@18 1355 cumTrack+=trackTable[l];
mas01mc@18 1356 if(sIndexes[k]<cumTrack){
mas01cr@133 1357 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
mas01cr@0 1358 break;
mas01cr@0 1359 }
mas01cr@0 1360 }
mas01cr@133 1361 adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
mas01cr@0 1362 }
mas01cr@0 1363 }
mas01cr@0 1364
mas01cr@0 1365 // Clean up
mas01cr@0 1366 if(queryCopy)
mas01cr@0 1367 delete queryCopy;
mas01cr@0 1368 if(qNorm)
mas01cr@0 1369 delete qNorm;
mas01cr@0 1370 if(timesdata)
mas01cr@0 1371 delete timesdata;
mas01cr@0 1372 if(dbdurs)
mas01cr@0 1373 delete dbdurs;
mas01cr@0 1374 }
mas01cr@0 1375
mas01mc@18 1376 // trackPointQuery
mas01mc@18 1377 // return the trackNN closest tracks to the query track
mas01mc@18 1378 // uses average of pointNN points per track
mas01cr@133 1379 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@27 1380 initTables(dbName, 0, inFile);
mas01cr@0 1381
mas01cr@0 1382 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1383 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1384 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1385 double* data = dataBuf;
mas01cr@0 1386 double* queryCopy = 0;
mas01cr@0 1387
mas01cr@0 1388 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@0 1389 // Make a copy of the query
mas01cr@0 1390 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1391 qNorm = new double[numVectors];
mas01cr@0 1392 assert(queryCopy&&qNorm);
mas01cr@0 1393 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1394 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1395 query = queryCopy;
mas01cr@0 1396 }
mas01cr@0 1397
mas01cr@0 1398 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@18 1399 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@0 1400
mas01cr@0 1401 // Make temporary dynamic memory for results
mas01mc@18 1402 double trackDistances[trackNN];
mas01mc@18 1403 unsigned trackIDs[trackNN];
mas01mc@18 1404 unsigned trackQIndexes[trackNN];
mas01mc@18 1405 unsigned trackSIndexes[trackNN];
mas01cr@0 1406
mas01cr@0 1407 double distances[pointNN];
mas01cr@0 1408 unsigned qIndexes[pointNN];
mas01cr@0 1409 unsigned sIndexes[pointNN];
mas01cr@0 1410
mas01cr@0 1411 unsigned j=numVectors; // number of query points
mas01mc@18 1412 unsigned k,l,n, track, trackOffset=0, processedTracks=0;
mas01cr@0 1413 double thisDist;
mas01cr@0 1414
mas01cr@0 1415 for(k=0; k<pointNN; k++){
mas01cr@66 1416 distances[k]=-DBL_MAX;
mas01cr@0 1417 qIndexes[k]=~0;
mas01cr@0 1418 sIndexes[k]=~0;
mas01cr@0 1419 }
mas01cr@0 1420
mas01mc@18 1421 for(k=0; k<trackNN; k++){
mas01cr@66 1422 trackDistances[k]=-DBL_MAX;
mas01mc@18 1423 trackQIndexes[k]=~0;
mas01mc@18 1424 trackSIndexes[k]=~0;
mas01mc@18 1425 trackIDs[k]=~0;
mas01cr@0 1426 }
mas01cr@0 1427
mas01cr@0 1428 double meanQdur = 0;
mas01cr@0 1429 double* timesdata = 0;
mas01cr@0 1430 double* meanDBdur = 0;
mas01cr@0 1431
mas01cr@0 1432 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1433 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1434 usingTimes=0;
mas01cr@0 1435 }
mas01cr@0 1436
mas01cr@0 1437 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1438 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1439
mas01cr@0 1440 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1441 timesdata = new double[numVectors];
mas01cr@0 1442 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1443 // Calculate durations of points
mas01cr@0 1444 for(k=0; k<numVectors-1; k++){
mas01cr@0 1445 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1446 meanQdur+=timesdata[k];
mas01cr@0 1447 }
mas01cr@0 1448 meanQdur/=k;
mas01cr@0 1449 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1450 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1451 meanDBdur[k]=0.0;
mas01mc@18 1452 for(j=0; j<trackTable[k]-1 ; j++)
mas01cr@0 1453 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1454 meanDBdur[k]/=j;
mas01cr@0 1455 }
mas01cr@0 1456 }
mas01cr@0 1457
mas01cr@0 1458 if(usingQueryPoint)
mas01cr@0 1459 if(queryPoint>numVectors-1)
mas01cr@0 1460 error("queryPoint > numVectors in query");
mas01cr@0 1461 else{
mas01cr@60 1462 if(verbosity>1) {
mas01cr@0 1463 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 1464 }
mas01cr@0 1465 query=query+queryPoint*dbH->dim;
mas01cr@0 1466 numVectors=queryPoint+1;
mas01cr@0 1467 }
mas01cr@0 1468
mas01mc@18 1469 // build track offset table
mas01mc@18 1470 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01mc@18 1471 unsigned cumTrack=0;
mas01mc@18 1472 unsigned trackIndexOffset;
mas01cr@0 1473 for(k=0; k<dbH->numFiles;k++){
mas01mc@18 1474 trackOffsetTable[k]=cumTrack;
mas01mc@18 1475 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@0 1476 }
mas01cr@0 1477
mas01cr@0 1478 char nextKey[MAXSTR];
mas01cr@0 1479
mas01cr@0 1480 gettimeofday(&tv1, NULL);
mas01cr@0 1481
mas01mc@18 1482 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01mc@18 1483 if(trackFile){
mas01mc@18 1484 if(!trackFile->eof()){
mas01mc@18 1485 trackFile->getline(nextKey,MAXSTR);
mas01mc@18 1486 track=getKeyPos(nextKey);
mas01cr@0 1487 }
mas01cr@0 1488 else
mas01cr@0 1489 break;
mas01cr@0 1490 }
mas01mc@18 1491 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01mc@18 1492 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@60 1493 if(verbosity>7) {
mas01mc@18 1494 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@60 1495 }
mas01cr@0 1496
mas01cr@0 1497 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@0 1498 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
mas01cr@0 1499 else
mas01cr@0 1500 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
mas01cr@0 1501 if(usingQueryPoint)
mas01cr@0 1502 j=1;
mas01cr@0 1503 else
mas01cr@0 1504 j=numVectors;
mas01cr@0 1505 while(j--){
mas01mc@18 1506 k=trackTable[track]; // number of vectors in track
mas01mc@18 1507 data=dataBuf+trackOffset; // data for track
mas01cr@0 1508 while(k--){
mas01cr@0 1509 thisDist=0;
mas01cr@0 1510 l=dbH->dim;
mas01cr@0 1511 double* q=query;
mas01cr@0 1512 while(l--)
mas01cr@0 1513 thisDist+=*q++**data++;
mas01cr@0 1514 if(!usingTimes ||
mas01cr@0 1515 (usingTimes
mas01mc@18 1516 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1517 n=pointNN;
mas01cr@0 1518 while(n--){
mas01cr@0 1519 if(thisDist>=distances[n]){
mas01cr@0 1520 if((n==0 || thisDist<=distances[n-1])){
mas01cr@0 1521 // Copy all values above up the queue
mas01cr@0 1522 for( l=pointNN-1 ; l > n ; l--){
mas01cr@0 1523 distances[l]=distances[l-1];
mas01cr@0 1524 qIndexes[l]=qIndexes[l-1];
mas01cr@0 1525 sIndexes[l]=sIndexes[l-1];
mas01cr@0 1526 }
mas01cr@0 1527 distances[n]=thisDist;
mas01cr@0 1528 qIndexes[n]=numVectors-j-1;
mas01mc@18 1529 sIndexes[n]=trackTable[track]-k-1;
mas01cr@0 1530 break;
mas01cr@0 1531 }
mas01cr@0 1532 }
mas01cr@0 1533 else
mas01cr@0 1534 break;
mas01cr@0 1535 }
mas01cr@0 1536 }
mas01mc@18 1537 } // track
mas01cr@0 1538 // Move query pointer to next query point
mas01cr@0 1539 query+=dbH->dim;
mas01cr@0 1540 } // query
mas01mc@18 1541 // Take the average of this track's distance
mas01mc@18 1542 // Test the track distances
mas01cr@0 1543 thisDist=0;
mas01cr@66 1544 for (n = 0; n < pointNN; n++) {
mas01cr@66 1545 if (distances[n] == -DBL_MAX) break;
mas01cr@66 1546 thisDist += distances[n];
mas01cr@66 1547 }
mas01cr@66 1548 thisDist /= n;
mas01cr@66 1549
mas01mc@18 1550 n=trackNN;
mas01cr@0 1551 while(n--){
mas01mc@18 1552 if(thisDist>=trackDistances[n]){
mas01mc@18 1553 if((n==0 || thisDist<=trackDistances[n-1])){
mas01cr@0 1554 // Copy all values above up the queue
mas01cr@74 1555 for( l=trackNN-1 ; l > n ; l--){
mas01mc@18 1556 trackDistances[l]=trackDistances[l-1];
mas01mc@18 1557 trackQIndexes[l]=trackQIndexes[l-1];
mas01mc@18 1558 trackSIndexes[l]=trackSIndexes[l-1];
mas01mc@18 1559 trackIDs[l]=trackIDs[l-1];
mas01cr@0 1560 }
mas01mc@18 1561 trackDistances[n]=thisDist;
mas01mc@18 1562 trackQIndexes[n]=qIndexes[0];
mas01mc@18 1563 trackSIndexes[n]=sIndexes[0];
mas01mc@18 1564 trackIDs[n]=track;
mas01cr@0 1565 break;
mas01cr@0 1566 }
mas01cr@0 1567 }
mas01cr@0 1568 else
mas01cr@0 1569 break;
mas01cr@0 1570 }
mas01cr@0 1571 for(unsigned k=0; k<pointNN; k++){
mas01cr@66 1572 distances[k]=-DBL_MAX;
mas01cr@0 1573 qIndexes[k]=~0;
mas01cr@0 1574 sIndexes[k]=~0;
mas01cr@0 1575 }
mas01mc@18 1576 } // tracks
mas01cr@0 1577 gettimeofday(&tv2, NULL);
mas01cr@0 1578
mas01cr@60 1579 if(verbosity>1) {
mas01mc@18 1580 cerr << endl << "processed tracks :" << processedTracks
mas01cr@0 1581 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01cr@60 1582 }
mas01cr@0 1583
mas01cr@133 1584 if(adbQueryResponse==0){
mas01cr@60 1585 if(verbosity>1) {
mas01cr@0 1586 cerr<<endl;
mas01cr@60 1587 }
mas01cr@0 1588 // Output answer
mas01cr@0 1589 // Loop over nearest neighbours
mas01mc@18 1590 for(k=0; k < min(trackNN,processedTracks); k++)
mas01mc@18 1591 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE
mas01mc@18 1592 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
mas01cr@0 1593 }
mas01cr@0 1594 else{ // Process Web Services Query
mas01mc@18 1595 int listLen = min(trackNN, processedTracks);
mas01cr@133 1596 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 1597 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 1598 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 1599 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 1600 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 1601 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 1602 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 1603 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 1604 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 1605 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 1606 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@133 1607 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@133 1608 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@133 1609 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@0 1610 }
mas01cr@0 1611 }
mas01cr@0 1612
mas01cr@0 1613
mas01cr@0 1614 // Clean up
mas01mc@18 1615 if(trackOffsetTable)
mas01mc@18 1616 delete trackOffsetTable;
mas01cr@0 1617 if(queryCopy)
mas01cr@0 1618 delete queryCopy;
mas01cr@0 1619 if(qNorm)
mas01cr@0 1620 delete qNorm;
mas01cr@0 1621 if(timesdata)
mas01cr@0 1622 delete timesdata;
mas01cr@0 1623 if(meanDBdur)
mas01cr@0 1624 delete meanDBdur;
mas01cr@0 1625
mas01cr@0 1626 }
mas01cr@0 1627
mas01cr@148 1628 // This is a common pattern in sequence queries: what we are doing is
mas01cr@148 1629 // taking a window of length seqlen over a buffer of length length,
mas01cr@148 1630 // and placing the sum of the elements in that window in the first
mas01cr@148 1631 // element of the window: thus replacing all but the last seqlen
mas01cr@148 1632 // elements in the buffer the corresponding windowed sum.
mas01cr@148 1633 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@148 1634 double tmp1, tmp2, *ps;
mas01cr@148 1635 int j, w;
mas01cr@148 1636
mas01cr@148 1637 tmp1 = *buffer;
mas01cr@148 1638 j = 1;
mas01cr@148 1639 w = seqlen - 1;
mas01cr@148 1640 while(w--) {
mas01cr@148 1641 *buffer += buffer[j++];
mas01cr@148 1642 }
mas01cr@148 1643 ps = buffer + 1;
mas01cr@148 1644 w = length - seqlen; // +1 - 1
mas01cr@148 1645 while(w--) {
mas01cr@148 1646 tmp2 = *ps;
mas01cr@148 1647 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@148 1648 tmp1 = tmp2;
mas01cr@148 1649 ps++;
mas01cr@148 1650 }
mas01cr@148 1651 }
mas01cr@0 1652
mas01cr@157 1653 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@157 1654 int w = length - seqlen + 1;
mas01cr@157 1655 while(w--) {
mas01cr@157 1656 *buffer = sqrt(*buffer);
mas01cr@157 1657 buffer++;
mas01cr@157 1658 }
mas01cr@157 1659 }
mas01cr@157 1660
mas01cr@157 1661 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
mas01cr@157 1662 int w = length - seqlen + 1;
mas01cr@157 1663 while(w--) {
mas01cr@157 1664 *buffer /= seqlen;
mas01cr@157 1665 buffer++;
mas01cr@157 1666 }
mas01cr@157 1667 }
mas01cr@157 1668
mas01mc@20 1669 // k nearest-neighbor (k-NN) search between query and target tracks
mas01mc@20 1670 // efficient implementation based on matched filter
mas01mc@20 1671 // assumes normed shingles
mas01mc@20 1672 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
mas01cr@133 1673 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@0 1674
mas01cr@27 1675 initTables(dbName, 0, inFile);
mas01cr@0 1676
mas01cr@0 1677 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@0 1678 // we use stdout in this stub version
mas01cr@0 1679 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@0 1680 double* query = (double*)(indata+sizeof(int));
mas01cr@0 1681 double* queryCopy = 0;
mas01cr@0 1682
mas01cr@0 1683 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@55 1684 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@55 1685
mas01cr@55 1686 if(numVectors<sequenceLength)
mas01cr@55 1687 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@0 1688
mas01cr@60 1689 if(verbosity>1) {
mas01cr@0 1690 cerr << "performing norms ... "; cerr.flush();
mas01cr@60 1691 }
mas01cr@0 1692 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01mc@20 1693
mas01cr@0 1694 // Make a copy of the query
mas01cr@0 1695 queryCopy = new double[numVectors*dbH->dim];
mas01cr@0 1696 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@0 1697 qNorm = new double[numVectors];
mas01cr@0 1698 sNorm = new double[dbVectors];
mas01cr@154 1699 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
mas01cr@0 1700 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@0 1701 query = queryCopy;
mas01mc@20 1702
mas01cr@0 1703 // Make norm measurements relative to sequenceLength
mas01cr@0 1704 unsigned w = sequenceLength-1;
mas01cr@0 1705 unsigned i,j;
mas01mc@20 1706
mas01cr@0 1707 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@0 1708 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@125 1709 double* qnPtr = qNorm;
mas01cr@0 1710 double* snPtr = sNorm;
mas01cr@149 1711
mas01cr@155 1712 double *sPower = 0, *qPower = 0;
mas01cr@149 1713 double *spPtr = 0, *qpPtr = 0;
mas01cr@149 1714
mas01cr@149 1715 if (usingPower) {
mas01cr@149 1716 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@149 1717 error("database not power-enabled", dbName);
mas01cr@149 1718 }
mas01cr@149 1719 sPower = new double[dbVectors];
mas01cr@149 1720 spPtr = sPower;
mas01cr@149 1721 memcpy(sPower, powerTable, dbVectors * sizeof(double));
mas01cr@149 1722 }
mas01cr@149 1723
mas01cr@0 1724 for(i=0; i<dbH->numFiles; i++){
mas01cr@148 1725 if(trackTable[i]>=sequenceLength) {
mas01cr@148 1726 sequence_sum(snPtr, trackTable[i], sequenceLength);
mas01cr@157 1727 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
mas01cr@157 1728
mas01cr@149 1729 if (usingPower) {
mas01cr@149 1730 sequence_sum(spPtr, trackTable[i], sequenceLength);
mas01cr@157 1731 sequence_average(spPtr, trackTable[i], sequenceLength);
mas01cr@149 1732 }
mas01cr@0 1733 }
mas01cr@149 1734 snPtr += trackTable[i];
mas01cr@149 1735 if (usingPower) {
mas01cr@149 1736 spPtr += trackTable[i];
mas01cr@149 1737 }
mas01cr@0 1738 }
mas01cr@0 1739
mas01cr@148 1740 sequence_sum(qnPtr, numVectors, sequenceLength);
mas01cr@157 1741 sequence_sqrt(qnPtr, numVectors, sequenceLength);
mas01cr@157 1742
mas01cr@149 1743 if (usingPower) {
mas01cr@149 1744 qPower = new double[numVectors];
mas01cr@149 1745 qpPtr = qPower;
mas01cr@149 1746 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@149 1747 error("error seeking to data", powerFileName, "lseek");
mas01cr@149 1748 }
mas01cr@149 1749 int count = read(powerfd, qPower, numVectors * sizeof(double));
mas01cr@149 1750 if (count == -1) {
mas01cr@149 1751 error("error reading data", powerFileName, "read");
mas01cr@149 1752 }
mas01cr@149 1753 if ((unsigned) count != numVectors * sizeof(double)) {
mas01cr@149 1754 error("short read", powerFileName);
mas01cr@149 1755 }
mas01cr@149 1756
mas01cr@149 1757 sequence_sum(qpPtr, numVectors, sequenceLength);
mas01cr@157 1758 sequence_average(qpPtr, numVectors, sequenceLength);
mas01cr@0 1759 }
mas01mc@20 1760
mas01cr@60 1761 if(verbosity>1) {
mas01cr@60 1762 cerr << "done." << endl;
mas01cr@60 1763 }
mas01cr@0 1764
mas01cr@60 1765 if(verbosity>1) {
mas01mc@18 1766 cerr << "matching tracks..." << endl;
mas01cr@60 1767 }
mas01cr@0 1768
mas01cr@0 1769 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@18 1770 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@0 1771
mas01cr@0 1772 // Make temporary dynamic memory for results
mas01mc@18 1773 double trackDistances[trackNN];
mas01mc@18 1774 unsigned trackIDs[trackNN];
mas01mc@18 1775 unsigned trackQIndexes[trackNN];
mas01mc@18 1776 unsigned trackSIndexes[trackNN];
mas01cr@0 1777
mas01cr@0 1778 double distances[pointNN];
mas01cr@0 1779 unsigned qIndexes[pointNN];
mas01cr@0 1780 unsigned sIndexes[pointNN];
mas01cr@0 1781
mas01cr@0 1782
mas01mc@18 1783 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@0 1784 double thisDist;
mas01cr@0 1785
mas01cr@0 1786 for(k=0; k<pointNN; k++){
mas01mc@20 1787 distances[k]=1.0e6;
mas01cr@0 1788 qIndexes[k]=~0;
mas01cr@0 1789 sIndexes[k]=~0;
mas01cr@0 1790 }
mas01cr@0 1791
mas01mc@18 1792 for(k=0; k<trackNN; k++){
mas01mc@20 1793 trackDistances[k]=1.0e6;
mas01mc@18 1794 trackQIndexes[k]=~0;
mas01mc@18 1795 trackSIndexes[k]=~0;
mas01mc@18 1796 trackIDs[k]=~0;
mas01cr@0 1797 }
mas01cr@0 1798
mas01cr@0 1799 // Timestamp and durations processing
mas01cr@0 1800 double meanQdur = 0;
mas01cr@0 1801 double* timesdata = 0;
mas01cr@0 1802 double* meanDBdur = 0;
mas01cr@0 1803
mas01cr@0 1804 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1805 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01cr@0 1806 usingTimes=0;
mas01cr@0 1807 }
mas01cr@0 1808
mas01cr@0 1809 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@0 1810 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01cr@0 1811
mas01cr@0 1812 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@0 1813 timesdata = new double[numVectors];
mas01cr@0 1814 assert(timesdata);
mas01cr@0 1815 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@0 1816 // Calculate durations of points
mas01cr@0 1817 for(k=0; k<numVectors-1; k++){
mas01cr@0 1818 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01cr@0 1819 meanQdur+=timesdata[k];
mas01cr@0 1820 }
mas01cr@0 1821 meanQdur/=k;
mas01cr@60 1822 if(verbosity>1) {
mas01cr@0 1823 cerr << "mean query file duration: " << meanQdur << endl;
mas01cr@60 1824 }
mas01cr@0 1825 meanDBdur = new double[dbH->numFiles];
mas01cr@0 1826 assert(meanDBdur);
mas01cr@0 1827 for(k=0; k<dbH->numFiles; k++){
mas01cr@0 1828 meanDBdur[k]=0.0;
mas01mc@18 1829 for(j=0; j<trackTable[k]-1 ; j++)
mas01cr@0 1830 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01cr@0 1831 meanDBdur[k]/=j;
mas01cr@0 1832 }
mas01cr@0 1833 }
mas01cr@0 1834
mas01cr@0 1835 if(usingQueryPoint)
mas01cr@0 1836 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@0 1837 error("queryPoint > numVectors-wL+1 in query");
mas01cr@0 1838 else{
mas01cr@60 1839 if(verbosity>1) {
mas01cr@0 1840 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 1841 }
mas01cr@156 1842 query = query + queryPoint * dbH->dim;
mas01cr@156 1843 qnPtr = qnPtr + queryPoint;
mas01cr@156 1844 if (usingPower) {
mas01cr@156 1845 qpPtr = qpPtr + queryPoint;
mas01cr@156 1846 }
mas01cr@0 1847 numVectors=wL;
mas01cr@0 1848 }
mas01cr@0 1849
mas01mc@20 1850 double ** D = 0; // Differences query and target
mas01cr@0 1851 double ** DD = 0; // Matched filter distance
mas01cr@0 1852
mas01cr@0 1853 D = new double*[numVectors];
mas01cr@0 1854 assert(D);
mas01cr@0 1855 DD = new double*[numVectors];
mas01cr@0 1856 assert(DD);
mas01cr@0 1857
mas01cr@0 1858 gettimeofday(&tv1, NULL);
mas01cr@154 1859 unsigned processedTracks = 0;
mas01mc@18 1860 unsigned successfulTracks=0;
mas01cr@0 1861
mas01cr@0 1862 double* qp;
mas01cr@0 1863 double* sp;
mas01cr@0 1864 double* dp;
mas01cr@0 1865
mas01mc@18 1866 // build track offset table
mas01mc@18 1867 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01mc@18 1868 unsigned cumTrack=0;
mas01mc@18 1869 unsigned trackIndexOffset;
mas01cr@0 1870 for(k=0; k<dbH->numFiles;k++){
mas01mc@18 1871 trackOffsetTable[k]=cumTrack;
mas01mc@18 1872 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@0 1873 }
mas01cr@0 1874
mas01cr@0 1875 char nextKey [MAXSTR];
mas01mc@20 1876
mas01mc@20 1877 // chi^2 statistics
mas01mc@20 1878 double sampleCount = 0;
mas01mc@20 1879 double sampleSum = 0;
mas01mc@20 1880 double logSampleSum = 0;
mas01mc@20 1881 double minSample = 1e9;
mas01mc@20 1882 double maxSample = 0;
mas01mc@20 1883
mas01mc@20 1884 // Track loop
mas01mc@18 1885 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01cr@0 1886
mas01mc@18 1887 // get trackID from file if using a control file
mas01mc@18 1888 if(trackFile){
mas01mc@18 1889 if(!trackFile->eof()){
mas01mc@18 1890 trackFile->getline(nextKey,MAXSTR);
mas01mc@18 1891 track=getKeyPos(nextKey);
mas01cr@0 1892 }
mas01cr@0 1893 else
mas01cr@0 1894 break;
mas01cr@0 1895 }
mas01mc@12 1896
mas01mc@18 1897 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01mc@18 1898 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@0 1899
mas01cr@57 1900 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01cr@0 1901
mas01cr@60 1902 if(verbosity>7) {
mas01mc@18 1903 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@60 1904 }
mas01cr@0 1905
mas01mc@20 1906 // Sum products matrix
mas01cr@0 1907 for(j=0; j<numVectors;j++){
mas01mc@18 1908 D[j]=new double[trackTable[track]];
mas01cr@0 1909 assert(D[j]);
mas01cr@0 1910
mas01cr@0 1911 }
mas01cr@0 1912
mas01cr@0 1913 // Matched filter matrix
mas01cr@0 1914 for(j=0; j<numVectors;j++){
mas01mc@18 1915 DD[j]=new double[trackTable[track]];
mas01cr@0 1916 assert(DD[j]);
mas01cr@0 1917 }
mas01cr@0 1918
mas01mc@20 1919 // Dot product
mas01cr@0 1920 for(j=0; j<numVectors; j++)
mas01mc@18 1921 for(k=0; k<trackTable[track]; k++){
mas01cr@0 1922 qp=query+j*dbH->dim;
mas01mc@18 1923 sp=dataBuf+trackOffset+k*dbH->dim;
mas01cr@0 1924 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@0 1925 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@0 1926 *dp=0.0; // initialize correlation cell
mas01cr@0 1927 l=dbH->dim; // size of vectors
mas01cr@0 1928 while(l--)
mas01cr@0 1929 *dp+=*qp++**sp++;
mas01cr@0 1930 }
mas01cr@0 1931
mas01cr@0 1932 // Matched Filter
mas01cr@0 1933 // HOP SIZE == 1
mas01cr@0 1934 double* spd;
mas01cr@0 1935 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@0 1936 for(w=0; w<wL; w++)
mas01cr@0 1937 for(j=0; j<numVectors-w; j++){
mas01cr@0 1938 sp=DD[j];
mas01cr@0 1939 spd=D[j+w]+w;
mas01mc@18 1940 k=trackTable[track]-w;
mas01cr@0 1941 while(k--)
mas01cr@0 1942 *sp+++=*spd++;
mas01cr@0 1943 }
mas01cr@0 1944 }
mas01mc@20 1945
mas01cr@0 1946 else{ // HOP_SIZE != 1
mas01cr@0 1947 for(w=0; w<wL; w++)
mas01cr@0 1948 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@0 1949 sp=DD[j];
mas01cr@0 1950 spd=D[j+w]+w;
mas01mc@18 1951 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@0 1952 *sp+=*spd;
mas01cr@0 1953 sp+=HOP_SIZE;
mas01cr@0 1954 spd+=HOP_SIZE;
mas01cr@0 1955 }
mas01cr@0 1956 }
mas01cr@0 1957 }
mas01cr@0 1958
mas01cr@60 1959 if(verbosity>3 && usingTimes) {
mas01mc@18 1960 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
mas01cr@0 1961 cerr.flush();
mas01cr@0 1962 }
mas01cr@0 1963
mas01cr@0 1964 if(!usingTimes ||
mas01cr@0 1965 (usingTimes
mas01mc@18 1966 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@0 1967
mas01cr@60 1968 if(verbosity>3 && usingTimes) {
mas01cr@0 1969 cerr << "within duration tolerance." << endl;
mas01cr@0 1970 cerr.flush();
mas01cr@0 1971 }
mas01cr@0 1972
mas01cr@0 1973 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@53 1974 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@53 1975 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@125 1976 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@154 1977 if(verbosity>9) {
mas01cr@125 1978 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
mas01cr@60 1979 }
mas01mc@20 1980 // Gather chi^2 statistics
mas01mc@20 1981 if(thisDist<minSample)
mas01mc@20 1982 minSample=thisDist;
mas01mc@20 1983 else if(thisDist>maxSample)
mas01mc@20 1984 maxSample=thisDist;
mas01mc@20 1985 if(thisDist>1e-9){
mas01mc@20 1986 sampleCount++;
mas01mc@20 1987 sampleSum+=thisDist;
mas01mc@20 1988 logSampleSum+=log(thisDist);
mas01mc@20 1989 }
mas01mc@20 1990
mas01cr@125 1991 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01cr@0 1992 // Power test
mas01cr@149 1993 if (usingPower) {
mas01cr@149 1994 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
mas01cr@149 1995 thisDist = 1000000.0;
mas01cr@149 1996 }
mas01cr@149 1997 }
mas01cr@149 1998
mas01mc@20 1999 // k-NN match algorithm
mas01cr@58 2000 m=pointNN;
mas01mc@20 2001 while(m--){
mas01mc@20 2002 if(thisDist<=distances[m])
mas01mc@20 2003 if(m==0 || thisDist>=distances[m-1]){
mas01cr@0 2004 // Shuffle distances up the list
mas01cr@0 2005 for(l=pointNN-1; l>m; l--){
mas01cr@0 2006 distances[l]=distances[l-1];
mas01cr@0 2007 qIndexes[l]=qIndexes[l-1];
mas01cr@0 2008 sIndexes[l]=sIndexes[l-1];
mas01cr@0 2009 }
mas01cr@0 2010 distances[m]=thisDist;
mas01cr@0 2011 if(usingQueryPoint)
mas01cr@0 2012 qIndexes[m]=queryPoint;
mas01cr@0 2013 else
mas01cr@0 2014 qIndexes[m]=j;
mas01cr@0 2015 sIndexes[m]=k;
mas01cr@0 2016 break;
mas01mc@20 2017 }
mas01cr@0 2018 }
mas01cr@0 2019 }
mas01cr@0 2020 // Calculate the mean of the N-Best matches
mas01cr@0 2021 thisDist=0.0;
mas01cr@53 2022 for(m=0; m<pointNN; m++) {
mas01cr@53 2023 if (distances[m] == 1000000.0) break;
mas01mc@20 2024 thisDist+=distances[m];
mas01cr@53 2025 }
mas01cr@53 2026 thisDist/=m;
mas01cr@0 2027
mas01mc@12 2028 // Let's see the distances then...
mas01cr@60 2029 if(verbosity>3) {
mas01mc@18 2030 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
mas01cr@60 2031 }
mas01mc@12 2032
mas01mc@20 2033
mas01mc@18 2034 // All the track stuff goes here
mas01cr@58 2035 n=trackNN;
mas01cr@0 2036 while(n--){
mas01mc@20 2037 if(thisDist<=trackDistances[n]){
mas01mc@20 2038 if((n==0 || thisDist>=trackDistances[n-1])){
mas01cr@0 2039 // Copy all values above up the queue
mas01mc@18 2040 for( l=trackNN-1 ; l > n ; l--){
mas01mc@18 2041 trackDistances[l]=trackDistances[l-1];
mas01mc@18 2042 trackQIndexes[l]=trackQIndexes[l-1];
mas01mc@18 2043 trackSIndexes[l]=trackSIndexes[l-1];
mas01mc@18 2044 trackIDs[l]=trackIDs[l-1];
mas01cr@0 2045 }
mas01mc@18 2046 trackDistances[n]=thisDist;
mas01mc@18 2047 trackQIndexes[n]=qIndexes[0];
mas01mc@18 2048 trackSIndexes[n]=sIndexes[0];
mas01mc@18 2049 successfulTracks++;
mas01mc@18 2050 trackIDs[n]=track;
mas01cr@0 2051 break;
mas01cr@0 2052 }
mas01cr@0 2053 }
mas01cr@0 2054 else
mas01cr@0 2055 break;
mas01cr@0 2056 }
mas01cr@0 2057 } // Duration match
mas01mc@20 2058
mas01mc@18 2059 // Clean up current track
mas01cr@0 2060 if(D!=NULL){
mas01cr@0 2061 for(j=0; j<numVectors; j++)
mas01cr@0 2062 delete[] D[j];
mas01cr@0 2063 }
mas01cr@0 2064
mas01cr@0 2065 if(DD!=NULL){
mas01cr@0 2066 for(j=0; j<numVectors; j++)
mas01cr@0 2067 delete[] DD[j];
mas01cr@0 2068 }
mas01cr@0 2069 }
mas01mc@20 2070 // per-track reset array values
mas01mc@20 2071 for(unsigned k=0; k<pointNN; k++){
mas01mc@20 2072 distances[k]=1.0e6;
mas01mc@20 2073 qIndexes[k]=~0;
mas01mc@20 2074 sIndexes[k]=~0;
mas01mc@20 2075 }
mas01cr@0 2076 }
mas01cr@0 2077
mas01cr@0 2078 gettimeofday(&tv2,NULL);
mas01cr@60 2079 if(verbosity>1) {
mas01mc@18 2080 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@0 2081 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01mc@20 2082 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01mc@20 2083 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
mas01mc@20 2084 }
mas01cr@133 2085 if(adbQueryResponse==0){
mas01cr@60 2086 if(verbosity>1) {
mas01cr@0 2087 cerr<<endl;
mas01cr@60 2088 }
mas01cr@0 2089 // Output answer
mas01cr@0 2090 // Loop over nearest neighbours
mas01mc@18 2091 for(k=0; k < min(trackNN,successfulTracks); k++)
mas01mc@20 2092 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
mas01mc@20 2093 << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
mas01cr@0 2094 }
mas01cr@0 2095 else{ // Process Web Services Query
mas01mc@18 2096 int listLen = min(trackNN, processedTracks);
mas01cr@133 2097 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 2098 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 2099 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 2100 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 2101 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 2102 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 2103 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 2104 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 2105 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 2106 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 2107 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@133 2108 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@133 2109 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@133 2110 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@0 2111 }
mas01cr@0 2112 }
mas01cr@0 2113
mas01cr@0 2114 // Clean up
mas01mc@18 2115 if(trackOffsetTable)
mas01mc@20 2116 delete[] trackOffsetTable;
mas01cr@0 2117 if(queryCopy)
mas01mc@20 2118 delete[] queryCopy;
mas01cr@121 2119 if(qNorm)
mas01cr@121 2120 delete[] qNorm;
mas01cr@121 2121 if(sNorm)
mas01cr@121 2122 delete[] sNorm;
mas01cr@155 2123 if(qPower)
mas01cr@155 2124 delete[] qPower;
mas01cr@155 2125 if(sPower)
mas01cr@155 2126 delete[] sPower;
mas01cr@0 2127 if(D)
mas01cr@0 2128 delete[] D;
mas01cr@0 2129 if(DD)
mas01cr@0 2130 delete[] DD;
mas01cr@0 2131 if(timesdata)
mas01mc@20 2132 delete[] timesdata;
mas01cr@0 2133 if(meanDBdur)
mas01mc@20 2134 delete[] meanDBdur;
mas01cr@0 2135 }
mas01cr@0 2136
mas01mc@20 2137 // Radius search between query and target tracks
mas01mc@20 2138 // efficient implementation based on matched filter
mas01mc@20 2139 // assumes normed shingles
mas01mc@20 2140 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
mas01cr@133 2141 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01mc@17 2142
mas01cr@27 2143 initTables(dbName, 0, inFile);
mas01mc@17 2144
mas01mc@17 2145 // For each input vector, find the closest pointNN matching output vectors and report
mas01mc@17 2146 // we use stdout in this stub version
mas01mc@17 2147 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01mc@17 2148 double* query = (double*)(indata+sizeof(int));
mas01mc@17 2149 double* queryCopy = 0;
mas01mc@17 2150
mas01mc@17 2151 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01mc@17 2152 error("Database must be L2 normed for sequence query","use -l2norm");
mas01mc@17 2153
mas01cr@60 2154 if(verbosity>1) {
mas01mc@17 2155 cerr << "performing norms ... "; cerr.flush();
mas01cr@60 2156 }
mas01mc@17 2157 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01mc@18 2158
mas01mc@17 2159 // Make a copy of the query
mas01mc@17 2160 queryCopy = new double[numVectors*dbH->dim];
mas01mc@17 2161 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01mc@17 2162 qNorm = new double[numVectors];
mas01mc@17 2163 sNorm = new double[dbVectors];
mas01cr@155 2164 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
mas01mc@17 2165 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01mc@17 2166 query = queryCopy;
mas01mc@18 2167
mas01mc@17 2168 // Make norm measurements relative to sequenceLength
mas01mc@17 2169 unsigned w = sequenceLength-1;
mas01mc@17 2170 unsigned i,j;
mas01mc@18 2171
mas01mc@17 2172 // Copy the L2 norm values to core to avoid disk random access later on
mas01mc@17 2173 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01mc@17 2174 double* snPtr = sNorm;
mas01cr@125 2175 double* qnPtr = qNorm;
mas01cr@155 2176
mas01cr@155 2177 double *sPower = 0, *qPower = 0;
mas01cr@155 2178 double *spPtr = 0, *qpPtr = 0;
mas01cr@155 2179
mas01cr@155 2180 if (usingPower) {
mas01cr@155 2181 if(!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@155 2182 error("database not power-enabled", dbName);
mas01cr@155 2183 }
mas01cr@155 2184 sPower = new double[dbVectors];
mas01cr@155 2185 spPtr = sPower;
mas01cr@155 2186 memcpy(sPower, powerTable, dbVectors * sizeof(double));
mas01cr@155 2187 }
mas01cr@155 2188
mas01mc@17 2189 for(i=0; i<dbH->numFiles; i++){
mas01cr@155 2190 if(trackTable[i]>=sequenceLength) {
mas01cr@148 2191 sequence_sum(snPtr, trackTable[i], sequenceLength);
mas01cr@157 2192 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
mas01cr@155 2193 if (usingPower) {
mas01cr@155 2194 sequence_sum(spPtr, trackTable[i], sequenceLength);
mas01cr@157 2195 sequence_average(spPtr, trackTable[i], sequenceLength);
mas01cr@155 2196 }
mas01mc@17 2197 }
mas01cr@155 2198 snPtr += trackTable[i];
mas01cr@155 2199 if (usingPower) {
mas01cr@155 2200 spPtr += trackTable[i];
mas01cr@155 2201 }
mas01mc@17 2202 }
mas01mc@17 2203
mas01cr@155 2204 sequence_sum(qnPtr, numVectors, sequenceLength);
mas01cr@157 2205 sequence_sqrt(qnPtr, numVectors, sequenceLength);
mas01cr@157 2206
mas01cr@155 2207 if (usingPower) {
mas01cr@155 2208 qPower = new double[numVectors];
mas01cr@155 2209 qpPtr = qPower;
mas01cr@155 2210 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@155 2211 error("error seeking to data", powerFileName, "lseek");
mas01mc@17 2212 }
mas01cr@155 2213 int count = read(powerfd, qPower, numVectors * sizeof(double));
mas01cr@155 2214 if (count == -1) {
mas01cr@155 2215 error("error reading data", powerFileName, "read");
mas01cr@155 2216 }
mas01cr@155 2217 if ((unsigned) count != numVectors * sizeof(double)) {
mas01cr@155 2218 error("short read", powerFileName);
mas01cr@155 2219 }
mas01cr@155 2220
mas01cr@155 2221 sequence_sum(qpPtr, numVectors, sequenceLength);
mas01cr@157 2222 sequence_average(qpPtr, numVectors, sequenceLength);
mas01mc@17 2223 }
mas01mc@17 2224
mas01cr@60 2225 if(verbosity>1) {
mas01mc@17 2226 cerr << "done." << endl;
mas01cr@60 2227 }
mas01mc@17 2228
mas01cr@60 2229 if(verbosity>1) {
mas01mc@18 2230 cerr << "matching tracks..." << endl;
mas01cr@60 2231 }
mas01mc@17 2232
mas01mc@17 2233 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@18 2234 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@17 2235
mas01mc@17 2236 // Make temporary dynamic memory for results
mas01mc@18 2237 double trackDistances[trackNN];
mas01mc@18 2238 unsigned trackIDs[trackNN];
mas01mc@18 2239 unsigned trackQIndexes[trackNN];
mas01mc@18 2240 unsigned trackSIndexes[trackNN];
mas01mc@17 2241
mas01mc@17 2242 double distances[pointNN];
mas01mc@17 2243 unsigned qIndexes[pointNN];
mas01mc@17 2244 unsigned sIndexes[pointNN];
mas01mc@17 2245
mas01mc@17 2246
mas01cr@59 2247 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01mc@17 2248 double thisDist;
mas01mc@17 2249
mas01mc@17 2250 for(k=0; k<pointNN; k++){
mas01mc@17 2251 distances[k]=0.0;
mas01mc@17 2252 qIndexes[k]=~0;
mas01mc@17 2253 sIndexes[k]=~0;
mas01mc@17 2254 }
mas01mc@17 2255
mas01mc@18 2256 for(k=0; k<trackNN; k++){
mas01mc@18 2257 trackDistances[k]=0.0;
mas01mc@18 2258 trackQIndexes[k]=~0;
mas01mc@18 2259 trackSIndexes[k]=~0;
mas01mc@18 2260 trackIDs[k]=~0;
mas01mc@17 2261 }
mas01mc@17 2262
mas01mc@17 2263 // Timestamp and durations processing
mas01mc@17 2264 double meanQdur = 0;
mas01mc@17 2265 double* timesdata = 0;
mas01mc@17 2266 double* meanDBdur = 0;
mas01mc@17 2267
mas01mc@17 2268 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01mc@17 2269 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
mas01mc@17 2270 usingTimes=0;
mas01mc@17 2271 }
mas01mc@17 2272
mas01mc@17 2273 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01mc@17 2274 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
mas01mc@17 2275
mas01mc@17 2276 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01mc@17 2277 timesdata = new double[numVectors];
mas01mc@17 2278 assert(timesdata);
mas01mc@17 2279 insertTimeStamps(numVectors, timesFile, timesdata);
mas01mc@17 2280 // Calculate durations of points
mas01mc@17 2281 for(k=0; k<numVectors-1; k++){
mas01mc@17 2282 timesdata[k]=timesdata[k+1]-timesdata[k];
mas01mc@17 2283 meanQdur+=timesdata[k];
mas01mc@17 2284 }
mas01mc@17 2285 meanQdur/=k;
mas01cr@60 2286 if(verbosity>1) {
mas01mc@17 2287 cerr << "mean query file duration: " << meanQdur << endl;
mas01cr@60 2288 }
mas01mc@17 2289 meanDBdur = new double[dbH->numFiles];
mas01mc@17 2290 assert(meanDBdur);
mas01mc@17 2291 for(k=0; k<dbH->numFiles; k++){
mas01mc@17 2292 meanDBdur[k]=0.0;
mas01mc@18 2293 for(j=0; j<trackTable[k]-1 ; j++)
mas01mc@17 2294 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
mas01mc@17 2295 meanDBdur[k]/=j;
mas01mc@17 2296 }
mas01mc@17 2297 }
mas01mc@17 2298
mas01mc@17 2299 if(usingQueryPoint)
mas01mc@17 2300 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01mc@17 2301 error("queryPoint > numVectors-wL+1 in query");
mas01mc@17 2302 else{
mas01cr@60 2303 if(verbosity>1) {
mas01mc@17 2304 cerr << "query point: " << queryPoint << endl; cerr.flush();
mas01cr@60 2305 }
mas01cr@156 2306 query = query + queryPoint*dbH->dim;
mas01cr@156 2307 qnPtr = qnPtr + queryPoint;
mas01cr@156 2308 if (usingPower) {
mas01cr@156 2309 qpPtr = qpPtr + queryPoint;
mas01cr@156 2310 }
mas01mc@17 2311 numVectors=wL;
mas01mc@17 2312 }
mas01mc@17 2313
mas01mc@17 2314 double ** D = 0; // Differences query and target
mas01mc@17 2315 double ** DD = 0; // Matched filter distance
mas01mc@17 2316
mas01mc@17 2317 D = new double*[numVectors];
mas01mc@17 2318 assert(D);
mas01mc@17 2319 DD = new double*[numVectors];
mas01mc@17 2320 assert(DD);
mas01mc@17 2321
mas01mc@17 2322 gettimeofday(&tv1, NULL);
mas01cr@155 2323 unsigned processedTracks = 0;
mas01mc@18 2324 unsigned successfulTracks=0;
mas01mc@17 2325
mas01mc@17 2326 double* qp;
mas01mc@17 2327 double* sp;
mas01mc@17 2328 double* dp;
mas01mc@17 2329
mas01mc@18 2330 // build track offset table
mas01mc@18 2331 unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
mas01mc@18 2332 unsigned cumTrack=0;
mas01mc@18 2333 unsigned trackIndexOffset;
mas01mc@17 2334 for(k=0; k<dbH->numFiles;k++){
mas01mc@18 2335 trackOffsetTable[k]=cumTrack;
mas01mc@18 2336 cumTrack+=trackTable[k]*dbH->dim;
mas01mc@17 2337 }
mas01mc@17 2338
mas01mc@17 2339 char nextKey [MAXSTR];
mas01mc@17 2340
mas01mc@17 2341 // chi^2 statistics
mas01mc@17 2342 double sampleCount = 0;
mas01mc@17 2343 double sampleSum = 0;
mas01mc@17 2344 double logSampleSum = 0;
mas01mc@17 2345 double minSample = 1e9;
mas01mc@17 2346 double maxSample = 0;
mas01mc@17 2347
mas01mc@17 2348 // Track loop
mas01mc@18 2349 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01mc@17 2350
mas01mc@18 2351 // get trackID from file if using a control file
mas01mc@18 2352 if(trackFile){
mas01mc@18 2353 if(!trackFile->eof()){
mas01mc@18 2354 trackFile->getline(nextKey,MAXSTR);
mas01mc@18 2355 track=getKeyPos(nextKey);
mas01mc@17 2356 }
mas01mc@17 2357 else
mas01mc@17 2358 break;
mas01mc@17 2359 }
mas01mc@17 2360
mas01mc@18 2361 trackOffset=trackOffsetTable[track]; // numDoubles offset
mas01mc@18 2362 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01mc@17 2363
mas01cr@70 2364 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01mc@17 2365
mas01cr@60 2366 if(verbosity>7) {
mas01mc@18 2367 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
mas01cr@60 2368 }
mas01cr@60 2369
mas01mc@17 2370 // Sum products matrix
mas01mc@17 2371 for(j=0; j<numVectors;j++){
mas01mc@18 2372 D[j]=new double[trackTable[track]];
mas01mc@17 2373 assert(D[j]);
mas01mc@17 2374
mas01mc@17 2375 }
mas01mc@17 2376
mas01mc@17 2377 // Matched filter matrix
mas01mc@17 2378 for(j=0; j<numVectors;j++){
mas01mc@18 2379 DD[j]=new double[trackTable[track]];
mas01mc@17 2380 assert(DD[j]);
mas01mc@17 2381 }
mas01mc@17 2382
mas01mc@17 2383 // Dot product
mas01mc@17 2384 for(j=0; j<numVectors; j++)
mas01mc@18 2385 for(k=0; k<trackTable[track]; k++){
mas01mc@17 2386 qp=query+j*dbH->dim;
mas01mc@18 2387 sp=dataBuf+trackOffset+k*dbH->dim;
mas01mc@17 2388 DD[j][k]=0.0; // Initialize matched filter array
mas01mc@17 2389 dp=&D[j][k]; // point to correlation cell j,k
mas01mc@17 2390 *dp=0.0; // initialize correlation cell
mas01mc@17 2391 l=dbH->dim; // size of vectors
mas01mc@17 2392 while(l--)
mas01mc@17 2393 *dp+=*qp++**sp++;
mas01mc@17 2394 }
mas01mc@17 2395
mas01mc@17 2396 // Matched Filter
mas01mc@17 2397 // HOP SIZE == 1
mas01mc@17 2398 double* spd;
mas01mc@17 2399 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01mc@17 2400 for(w=0; w<wL; w++)
mas01mc@17 2401 for(j=0; j<numVectors-w; j++){
mas01mc@17 2402 sp=DD[j];
mas01mc@17 2403 spd=D[j+w]+w;
mas01mc@18 2404 k=trackTable[track]-w;
mas01mc@17 2405 while(k--)
mas01mc@17 2406 *sp+++=*spd++;
mas01mc@17 2407 }
mas01mc@17 2408 }
mas01mc@17 2409
mas01mc@17 2410 else{ // HOP_SIZE != 1
mas01mc@17 2411 for(w=0; w<wL; w++)
mas01mc@17 2412 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01mc@17 2413 sp=DD[j];
mas01mc@17 2414 spd=D[j+w]+w;
mas01mc@18 2415 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01mc@17 2416 *sp+=*spd;
mas01mc@17 2417 sp+=HOP_SIZE;
mas01mc@17 2418 spd+=HOP_SIZE;
mas01mc@17 2419 }
mas01mc@17 2420 }
mas01mc@17 2421 }
mas01mc@17 2422
mas01cr@60 2423 if(verbosity>3 && usingTimes) {
mas01mc@18 2424 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
mas01mc@17 2425 cerr.flush();
mas01mc@17 2426 }
mas01mc@17 2427
mas01mc@17 2428 if(!usingTimes ||
mas01mc@17 2429 (usingTimes
mas01mc@18 2430 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01mc@17 2431
mas01cr@60 2432 if(verbosity>3 && usingTimes) {
mas01mc@17 2433 cerr << "within duration tolerance." << endl;
mas01mc@17 2434 cerr.flush();
mas01mc@17 2435 }
mas01mc@17 2436
mas01mc@17 2437 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@70 2438 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@70 2439 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@125 2440 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@155 2441 if(verbosity>9) {
mas01cr@125 2442 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
mas01cr@60 2443 }
mas01mc@17 2444 // Gather chi^2 statistics
mas01mc@17 2445 if(thisDist<minSample)
mas01mc@17 2446 minSample=thisDist;
mas01mc@17 2447 else if(thisDist>maxSample)
mas01mc@17 2448 maxSample=thisDist;
mas01mc@17 2449 if(thisDist>1e-9){
mas01mc@17 2450 sampleCount++;
mas01mc@17 2451 sampleSum+=thisDist;
mas01mc@17 2452 logSampleSum+=log(thisDist);
mas01mc@17 2453 }
mas01mc@17 2454
mas01cr@125 2455 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01mc@17 2456 // Power test
mas01cr@155 2457 if (usingPower) {
mas01cr@155 2458 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
mas01cr@155 2459 thisDist = 1000000.0;
mas01cr@155 2460 }
mas01cr@155 2461 }
mas01cr@155 2462
mas01mc@17 2463 if(thisDist>=0 && thisDist<=radius){
mas01mc@17 2464 distances[0]++; // increment count
mas01mc@18 2465 break; // only need one track point per query point
mas01mc@17 2466 }
mas01mc@17 2467 }
mas01mc@17 2468 // How many points were below threshold ?
mas01mc@17 2469 thisDist=distances[0];
mas01mc@17 2470
mas01mc@17 2471 // Let's see the distances then...
mas01cr@60 2472 if(verbosity>3) {
mas01mc@18 2473 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
mas01cr@60 2474 }
mas01mc@17 2475
mas01mc@18 2476 // All the track stuff goes here
mas01mc@18 2477 n=trackNN;
mas01mc@17 2478 while(n--){
mas01mc@18 2479 if(thisDist>trackDistances[n]){
mas01mc@18 2480 if((n==0 || thisDist<=trackDistances[n-1])){
mas01mc@17 2481 // Copy all values above up the queue
mas01mc@18 2482 for( l=trackNN-1 ; l > n ; l--){
mas01mc@18 2483 trackDistances[l]=trackDistances[l-1];
mas01mc@18 2484 trackQIndexes[l]=trackQIndexes[l-1];
mas01mc@18 2485 trackSIndexes[l]=trackSIndexes[l-1];
mas01mc@18 2486 trackIDs[l]=trackIDs[l-1];
mas01mc@17 2487 }
mas01mc@18 2488 trackDistances[n]=thisDist;
mas01mc@18 2489 trackQIndexes[n]=qIndexes[0];
mas01mc@18 2490 trackSIndexes[n]=sIndexes[0];
mas01mc@18 2491 successfulTracks++;
mas01mc@18 2492 trackIDs[n]=track;
mas01mc@17 2493 break;
mas01mc@17 2494 }
mas01mc@17 2495 }
mas01mc@17 2496 else
mas01mc@17 2497 break;
mas01mc@17 2498 }
mas01mc@17 2499 } // Duration match
mas01mc@17 2500
mas01mc@18 2501 // Clean up current track
mas01mc@17 2502 if(D!=NULL){
mas01mc@17 2503 for(j=0; j<numVectors; j++)
mas01mc@17 2504 delete[] D[j];
mas01mc@17 2505 }
mas01mc@17 2506
mas01mc@17 2507 if(DD!=NULL){
mas01mc@17 2508 for(j=0; j<numVectors; j++)
mas01mc@17 2509 delete[] DD[j];
mas01mc@17 2510 }
mas01mc@17 2511 }
mas01mc@18 2512 // per-track reset array values
mas01mc@17 2513 for(unsigned k=0; k<pointNN; k++){
mas01mc@17 2514 distances[k]=0.0;
mas01mc@17 2515 qIndexes[k]=~0;
mas01mc@17 2516 sIndexes[k]=~0;
mas01mc@17 2517 }
mas01mc@17 2518 }
mas01mc@17 2519
mas01mc@17 2520 gettimeofday(&tv2,NULL);
mas01cr@60 2521 if(verbosity>1) {
mas01mc@18 2522 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01mc@17 2523 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
mas01mc@17 2524 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01mc@17 2525 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
mas01mc@17 2526 }
mas01mc@17 2527
mas01cr@133 2528 if(adbQueryResponse==0){
mas01cr@60 2529 if(verbosity>1) {
mas01mc@17 2530 cerr<<endl;
mas01cr@60 2531 }
mas01mc@17 2532 // Output answer
mas01mc@17 2533 // Loop over nearest neighbours
mas01mc@18 2534 for(k=0; k < min(trackNN,successfulTracks); k++)
mas01mc@18 2535 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl;
mas01mc@17 2536 }
mas01mc@17 2537 else{ // Process Web Services Query
mas01mc@18 2538 int listLen = min(trackNN, processedTracks);
mas01cr@133 2539 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@133 2540 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@133 2541 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@133 2542 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@133 2543 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@133 2544 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@133 2545 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@133 2546 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@133 2547 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@133 2548 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@133 2549 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@133 2550 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@133 2551 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@133 2552 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01mc@17 2553 }
mas01mc@17 2554 }
mas01mc@17 2555
mas01mc@17 2556 // Clean up
mas01mc@18 2557 if(trackOffsetTable)
mas01mc@18 2558 delete[] trackOffsetTable;
mas01mc@17 2559 if(queryCopy)
mas01mc@17 2560 delete[] queryCopy;
mas01cr@121 2561 if(qNorm)
mas01cr@121 2562 delete[] qNorm;
mas01cr@121 2563 if(sNorm)
mas01cr@121 2564 delete[] sNorm;
mas01cr@155 2565 if(qPower)
mas01cr@155 2566 delete[] qPower;
mas01cr@155 2567 if(sPower)
mas01cr@155 2568 delete[] sPower;
mas01mc@17 2569 if(D)
mas01mc@17 2570 delete[] D;
mas01mc@17 2571 if(DD)
mas01mc@17 2572 delete[] DD;
mas01mc@17 2573 if(timesdata)
mas01mc@17 2574 delete[] timesdata;
mas01mc@17 2575 if(meanDBdur)
mas01mc@17 2576 delete[] meanDBdur;
mas01mc@17 2577 }
mas01mc@17 2578
mas01cr@0 2579 // Unit norm block of features
mas01cr@0 2580 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@0 2581 unsigned d;
mas01cr@59 2582 double L2, *p;
mas01cr@60 2583 if(verbosity>2) {
mas01cr@0 2584 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@60 2585 }
mas01cr@0 2586 while(n--){
mas01cr@0 2587 p=X;
mas01cr@0 2588 L2=0.0;
mas01cr@0 2589 d=dim;
mas01cr@0 2590 while(d--){
mas01cr@0 2591 L2+=*p**p;
mas01cr@0 2592 p++;
mas01cr@0 2593 }
mas01mc@17 2594 /* L2=sqrt(L2);*/
mas01cr@0 2595 if(qNorm)
mas01cr@0 2596 *qNorm++=L2;
mas01mc@17 2597 /*
mas01cr@0 2598 oneOverL2 = 1.0/L2;
mas01cr@0 2599 d=dim;
mas01cr@0 2600 while(d--){
mas01cr@0 2601 *X*=oneOverL2;
mas01cr@0 2602 X++;
mas01mc@17 2603 */
mas01mc@17 2604 X+=dim;
mas01cr@0 2605 }
mas01cr@60 2606 if(verbosity>2) {
mas01cr@0 2607 cerr << "done..." << endl;
mas01cr@60 2608 }
mas01cr@0 2609 }
mas01cr@0 2610
mas01cr@0 2611 // Unit norm block of features
mas01cr@0 2612 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 2613 unsigned d;
mas01cr@59 2614 double *p;
mas01cr@0 2615 unsigned nn = n;
mas01cr@0 2616
mas01cr@0 2617 assert(l2normTable);
mas01cr@0 2618
mas01cr@0 2619 if( !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 2620 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 2621
mas01cr@60 2622 if(verbosity>2) {
mas01cr@0 2623 cerr << "norming " << n << " vectors...";cerr.flush();
mas01cr@60 2624 }
mas01cr@0 2625
mas01cr@0 2626 double* l2buf = new double[n];
mas01cr@0 2627 double* l2ptr = l2buf;
mas01cr@0 2628 assert(l2buf);
mas01cr@0 2629 assert(X);
mas01cr@0 2630
mas01cr@0 2631 while(nn--){
mas01cr@0 2632 p=X;
mas01cr@0 2633 *l2ptr=0.0;
mas01cr@0 2634 d=dim;
mas01cr@0 2635 while(d--){
mas01cr@0 2636 *l2ptr+=*p**p;
mas01cr@0 2637 p++;
mas01cr@0 2638 }
mas01mc@17 2639 l2ptr++;
mas01mc@17 2640 /*
mas01mc@17 2641 oneOverL2 = 1.0/(*l2ptr++);
mas01mc@17 2642 d=dim;
mas01mc@17 2643 while(d--){
mas01cr@0 2644 *X*=oneOverL2;
mas01cr@0 2645 X++;
mas01mc@17 2646 }
mas01mc@17 2647 */
mas01mc@17 2648 X+=dim;
mas01cr@0 2649 }
mas01cr@0 2650 unsigned offset;
mas01cr@84 2651 if(append) {
mas01cr@84 2652 // FIXME: a hack, a very palpable hack: the vectors have already
mas01cr@84 2653 // been inserted, and dbH->length has already been updated. We
mas01cr@84 2654 // need to subtract off again the number of vectors that we've
mas01cr@84 2655 // inserted this time...
mas01cr@84 2656 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
mas01cr@84 2657 } else {
mas01cr@0 2658 offset=0;
mas01cr@84 2659 }
mas01cr@0 2660 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 2661 if(l2buf)
mas01mc@17 2662 delete[] l2buf;
mas01cr@60 2663 if(verbosity>2) {
mas01cr@0 2664 cerr << "done..." << endl;
mas01cr@60 2665 }
mas01cr@0 2666 }
mas01cr@0 2667
mas01cr@0 2668
mas01cr@0 2669 // Start an audioDB server on the host
mas01cr@0 2670 void audioDB::startServer(){
mas01cr@0 2671 struct soap soap;
mas01cr@0 2672 int m, s; // master and slave sockets
mas01cr@0 2673 soap_init(&soap);
mas01cr@92 2674 // FIXME: largely this use of SO_REUSEADDR is to make writing (and
mas01cr@92 2675 // running) test cases more convenient, so that multiple test runs
mas01cr@92 2676 // in close succession don't fail because of a bin() error.
mas01cr@92 2677 // Investigate whether there are any potential drawbacks in this,
mas01cr@92 2678 // and also whether there's a better way to write the tests. --
mas01cr@92 2679 // CSR, 2007-10-03
mas01cr@92 2680 soap.bind_flags |= SO_REUSEADDR;
mas01cr@0 2681 m = soap_bind(&soap, NULL, port, 100);
mas01cr@0 2682 if (m < 0)
mas01cr@0 2683 soap_print_fault(&soap, stderr);
mas01cr@0 2684 else
mas01cr@0 2685 {
mas01cr@0 2686 fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
mas01cr@0 2687 for (int i = 1; ; i++)
mas01cr@0 2688 {
mas01cr@0 2689 s = soap_accept(&soap);
mas01cr@0 2690 if (s < 0)
mas01cr@0 2691 {
mas01cr@0 2692 soap_print_fault(&soap, stderr);
mas01cr@0 2693 break;
mas01cr@0 2694 }
mas01cr@75 2695 fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i,
mas01cr@0 2696 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
mas01cr@0 2697 if (soap_serve(&soap) != SOAP_OK) // process RPC request
mas01cr@0 2698 soap_print_fault(&soap, stderr); // print error
mas01cr@0 2699 fprintf(stderr, "request served\n");
mas01cr@0 2700 soap_destroy(&soap); // clean up class instances
mas01cr@0 2701 soap_end(&soap); // clean up everything and close socket
mas01cr@0 2702 }
mas01cr@0 2703 }
mas01cr@0 2704 soap_done(&soap); // close master socket and detach environment
mas01cr@0 2705 }
mas01cr@0 2706
mas01cr@0 2707
mas01cr@0 2708 // web services
mas01cr@0 2709
mas01cr@0 2710 // SERVER SIDE
mas01cr@133 2711 int adb__status(struct soap* soap, xsd__string dbName, adb__statusResponse &adbStatusResponse){
mas01cr@21 2712 char* const argv[]={"audioDB",COM_STATUS,"-d",dbName};
mas01cr@21 2713 const unsigned argc = 4;
mas01cr@77 2714 try {
mas01cr@133 2715 audioDB(argc, argv, &adbStatusResponse);
mas01cr@77 2716 return SOAP_OK;
mas01cr@77 2717 } catch(char *err) {
mas01cr@77 2718 soap_receiver_fault(soap, err, "");
mas01cr@77 2719 return SOAP_FAULT;
mas01cr@77 2720 }
mas01cr@0 2721 }
mas01cr@0 2722
mas01cr@0 2723 // Literal translation of command line to web service
mas01cr@0 2724
mas01cr@133 2725 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResponse &adbQueryResponse){
mas01cr@0 2726 char queryType[256];
mas01cr@0 2727 for(int k=0; k<256; k++)
mas01cr@0 2728 queryType[k]='\0';
mas01cr@105 2729 if(qType == O2_POINT_QUERY)
mas01cr@0 2730 strncpy(queryType, "point", strlen("point"));
mas01cr@105 2731 else if (qType == O2_SEQUENCE_QUERY)
mas01cr@0 2732 strncpy(queryType, "sequence", strlen("sequence"));
mas01cr@105 2733 else if(qType == O2_TRACK_QUERY)
mas01mc@18 2734 strncpy(queryType,"track", strlen("track"));
mas01cr@0 2735 else
mas01cr@0 2736 strncpy(queryType, "", strlen(""));
mas01cr@0 2737
mas01cr@0 2738 if(pointNN==0)
mas01cr@0 2739 pointNN=10;
mas01mc@18 2740 if(trackNN==0)
mas01mc@18 2741 trackNN=10;
mas01cr@0 2742 if(seqLen==0)
mas01cr@0 2743 seqLen=16;
mas01cr@0 2744
mas01cr@0 2745 char qPosStr[256];
mas01cr@0 2746 sprintf(qPosStr, "%d", qPos);
mas01cr@0 2747 char pointNNStr[256];
mas01cr@0 2748 sprintf(pointNNStr,"%d",pointNN);
mas01mc@18 2749 char trackNNStr[256];
mas01mc@18 2750 sprintf(trackNNStr,"%d",trackNN);
mas01cr@0 2751 char seqLenStr[256];
mas01cr@0 2752 sprintf(seqLenStr,"%d",seqLen);
mas01cr@0 2753
mas01cr@0 2754 const char* argv[] ={
mas01cr@0 2755 "./audioDB",
mas01cr@0 2756 COM_QUERY,
mas01cr@0 2757 queryType, // Need to pass a parameter
mas01cr@0 2758 COM_DATABASE,
mas01cr@0 2759 dbName,
mas01cr@0 2760 COM_FEATURES,
mas01cr@0 2761 qKey,
mas01cr@0 2762 COM_KEYLIST,
mas01cr@0 2763 keyList==0?"":keyList,
mas01cr@0 2764 COM_TIMES,
mas01cr@0 2765 timesFileName==0?"":timesFileName,
mas01cr@0 2766 COM_QPOINT,
mas01cr@0 2767 qPosStr,
mas01cr@0 2768 COM_POINTNN,
mas01cr@0 2769 pointNNStr,
mas01mc@18 2770 COM_TRACKNN,
mas01mc@18 2771 trackNNStr, // Need to pass a parameter
mas01cr@0 2772 COM_SEQLEN,
mas01cr@0 2773 seqLenStr
mas01cr@0 2774 };
mas01cr@0 2775
mas01cr@0 2776 const unsigned argc = 19;
mas01cr@79 2777 try {
mas01cr@133 2778 audioDB(argc, (char* const*)argv, &adbQueryResponse);
mas01cr@79 2779 return SOAP_OK;
mas01cr@79 2780 } catch (char *err) {
mas01cr@79 2781 soap_receiver_fault(soap, err, "");
mas01cr@79 2782 return SOAP_FAULT;
mas01cr@79 2783 }
mas01cr@0 2784 }
mas01cr@0 2785
mas01cr@171 2786 int adb__sequenceQuery(struct soap* soap, xsd__string dbName, xsd__string qKey,
mas01cr@171 2787 adb__sequenceQueryParms *parms,
mas01cr@171 2788 adb__queryResponse &adbQueryResponse) {
mas01cr@171 2789
mas01cr@171 2790 char qPosStr[256];
mas01cr@171 2791 char pointNNStr[256];
mas01cr@171 2792 char trackNNStr[256];
mas01cr@171 2793 char seqLenStr[256];
mas01cr@171 2794 char relative_thresholdStr[256];
mas01cr@171 2795 char absolute_thresholdStr[256];
mas01cr@171 2796
mas01cr@171 2797 /* When the branch is merged, move this to a header and use it
mas01cr@171 2798 elsewhere */
mas01cr@171 2799 #define INTSTRINGIFY(val, str) \
mas01cr@171 2800 snprintf(str, 256, "%d", val);
mas01cr@171 2801 #define DOUBLESTRINGIFY(val, str) \
mas01cr@171 2802 snprintf(str, 256, "%f", val);
mas01cr@171 2803
mas01cr@171 2804 INTSTRINGIFY(parms->qPos, qPosStr);
mas01cr@171 2805 INTSTRINGIFY(parms->pointNN, pointNNStr);
mas01cr@171 2806 INTSTRINGIFY(parms->segNN, trackNNStr);
mas01cr@171 2807 /* FIXME: decide which of segLen and seqLen should live */
mas01cr@171 2808 INTSTRINGIFY(parms->segLen, seqLenStr);
mas01cr@171 2809
mas01cr@171 2810 DOUBLESTRINGIFY(parms->relative_threshold, relative_thresholdStr);
mas01cr@171 2811 DOUBLESTRINGIFY(parms->absolute_threshold, absolute_thresholdStr);
mas01cr@171 2812
mas01cr@171 2813 const char *argv[] = {
mas01cr@171 2814 "./audioDB",
mas01cr@171 2815 COM_QUERY,
mas01cr@171 2816 "sequence",
mas01cr@171 2817 COM_DATABASE,
mas01cr@171 2818 dbName,
mas01cr@171 2819 COM_FEATURES,
mas01cr@171 2820 qKey,
mas01cr@171 2821 COM_KEYLIST,
mas01cr@171 2822 /* FIXME: when this branch is merged, use ENSURE_STRING */
mas01cr@171 2823 parms->keyList==0?"":parms->keyList,
mas01cr@171 2824 COM_TIMES,
mas01cr@171 2825 parms->timesFileName==0?"":parms->timesFileName,
mas01cr@171 2826 COM_QUERYPOWER,
mas01cr@171 2827 parms->powerFileName==0?"":parms->powerFileName,
mas01cr@171 2828 COM_QPOINT,
mas01cr@171 2829 qPosStr,
mas01cr@171 2830 COM_POINTNN,
mas01cr@171 2831 pointNNStr,
mas01cr@171 2832 COM_TRACKNN,
mas01cr@171 2833 trackNNStr,
mas01cr@171 2834 COM_SEQLEN,
mas01cr@171 2835 seqLenStr,
mas01cr@171 2836 COM_RELATIVE_THRESH,
mas01cr@171 2837 relative_thresholdStr,
mas01cr@171 2838 COM_ABSOLUTE_THRESH,
mas01cr@171 2839 absolute_thresholdStr
mas01cr@171 2840 };
mas01cr@171 2841
mas01cr@171 2842 const unsigned argc = 25;
mas01cr@171 2843
mas01cr@171 2844 try {
mas01cr@171 2845 audioDB(argc, (char* const*)argv, &adbQueryResponse);
mas01cr@171 2846 return SOAP_OK;
mas01cr@171 2847 } catch (char *err) {
mas01cr@171 2848 soap_receiver_fault(soap, err, "");
mas01cr@171 2849 return SOAP_FAULT;
mas01cr@171 2850 }
mas01cr@171 2851 }
mas01cr@171 2852
mas01cr@0 2853 int main(const unsigned argc, char* const argv[]){
mas01cr@0 2854 audioDB(argc, argv);
mas01cr@0 2855 }