Mercurial > hg > audiodb
comparison query.cpp @ 437:9a065b8db769 api-inversion
Hey, who let audioDB::set_up_query into the room?
audioDB::set_up_query and audioDB::set_up_db both do all sorts of
horrible pointer manipulation. Define a structure to contain all the
pointers, rather than having the hideous argument lists full of double
**, and use it in audioDB::set_up_query and
audioDB::set_up_query_from_key. (Those two functions are desperately
close to becoming one function, incidentally, or possibly even no
functions given the existence of adb_datum_t...)
author | mas01cr |
---|---|
date | Wed, 24 Dec 2008 10:56:07 +0000 |
parents | e43f8a7aca93 |
children | 8c1d8a40db91 |
comparison
equal
deleted
inserted
replaced
436:e43f8a7aca93 | 437:9a065b8db769 |
---|---|
308 | 308 |
309 // These names deserve some unpicking. The names starting with a "q" | 309 // These names deserve some unpicking. The names starting with a "q" |
310 // are pointers to the query, norm and power vectors; the names | 310 // are pointers to the query, norm and power vectors; the names |
311 // starting with "v" are things that will end up pointing to the | 311 // starting with "v" are things that will end up pointing to the |
312 // actual query point's information. -- CSR, 2007-12-05 | 312 // actual query point's information. -- CSR, 2007-12-05 |
313 void audioDB::set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) { | 313 void audioDB::set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers) { |
314 *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); | 314 uint32_t nvectors = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); |
315 qpointers->nvectors = nvectors; | |
315 | 316 |
316 uint32_t sequence_length = spec->qid.sequence_length; | 317 uint32_t sequence_length = spec->qid.sequence_length; |
317 if(!(dbH->flags & O2_FLAG_L2NORM)) { | 318 if(!(dbH->flags & O2_FLAG_L2NORM)) { |
318 error("Database must be L2 normed for sequence query","use -L2NORM"); | 319 error("Database must be L2 normed for sequence query","use -L2NORM"); |
319 } | 320 } |
320 | 321 |
321 if(*nvp < sequence_length) { | 322 if(nvectors < sequence_length) { |
322 error("Query shorter than requested sequence length", "maybe use -l"); | 323 error("Query shorter than requested sequence length", "maybe use -l"); |
323 } | 324 } |
324 | 325 |
325 VERB_LOG(1, "performing norms... "); | 326 VERB_LOG(1, "performing norms... "); |
326 | 327 |
327 *qp = new double[*nvp * dbH->dim]; | 328 *qp = new double[nvectors * dbH->dim]; |
328 memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double)); | 329 memcpy(*qp, indata+sizeof(int), nvectors * dbH->dim * sizeof(double)); |
329 *qnp = new double[*nvp]; | 330 qpointers->l2norm_data = new double[nvectors]; |
330 audiodb_l2norm_buffer(*qp, dbH->dim, *nvp, *qnp); | 331 audiodb_l2norm_buffer(*qp, dbH->dim, nvectors, qpointers->l2norm_data); |
331 | 332 |
332 audiodb_sequence_sum(*qnp, *nvp, sequence_length); | 333 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); |
333 audiodb_sequence_sqrt(*qnp, *nvp, sequence_length); | 334 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); |
334 | 335 |
335 if (usingPower) { | 336 if (usingPower) { |
336 *qpp = new double[*nvp]; | 337 qpointers->power_data = new double[nvectors]; |
337 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { | 338 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { |
338 error("error seeking to data", powerFileName, "lseek"); | 339 error("error seeking to data", powerFileName, "lseek"); |
339 } | 340 } |
340 int count = read(powerfd, *qpp, *nvp * sizeof(double)); | 341 int count = read(powerfd, qpointers->power_data, nvectors * sizeof(double)); |
341 if (count == -1) { | 342 if (count == -1) { |
342 error("error reading data", powerFileName, "read"); | 343 error("error reading data", powerFileName, "read"); |
343 } | 344 } |
344 if ((unsigned) count != *nvp * sizeof(double)) { | 345 if ((unsigned) count != nvectors * sizeof(double)) { |
345 error("short read", powerFileName); | 346 error("short read", powerFileName); |
346 } | 347 } |
347 | 348 |
348 audiodb_sequence_sum(*qpp, *nvp, sequence_length); | 349 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); |
349 audiodb_sequence_average(*qpp, *nvp, sequence_length); | 350 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); |
350 } | 351 } |
351 | 352 |
352 if (usingTimes) { | 353 if (usingTimes) { |
353 unsigned int k; | 354 unsigned int k; |
354 *mqdp = 0.0; | 355 qpointers->mean_duration = new double[1]; |
355 double *querydurs = new double[*nvp]; | 356 *qpointers->mean_duration = 0.0; |
356 double *timesdata = new double[*nvp*2]; | 357 double *querydurs = new double[nvectors]; |
357 insertTimeStamps(*nvp, timesFile, timesdata); | 358 double *timesdata = new double[2*nvectors]; |
358 for(k = 0; k < *nvp; k++) { | 359 insertTimeStamps(nvectors, timesFile, timesdata); |
360 for(k = 0; k < nvectors; k++) { | |
359 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | 361 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; |
360 *mqdp += querydurs[k]; | 362 *qpointers->mean_duration += querydurs[k]; |
361 } | 363 } |
362 *mqdp /= k; | 364 *qpointers->mean_duration /= k; |
363 | 365 |
364 VERB_LOG(1, "mean query file duration: %f\n", *mqdp); | 366 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration); |
365 | 367 |
366 delete [] querydurs; | 368 delete [] querydurs; |
367 delete [] timesdata; | 369 delete [] timesdata; |
368 } | 370 } |
369 | 371 |
370 // Defaults, for exhaustive search (!usingQueryPoint) | 372 // Defaults, for exhaustive search (!usingQueryPoint) |
371 *vqp = *qp; | 373 *vqp = *qp; |
372 *vqnp = *qnp; | 374 qpointers->l2norm = qpointers->l2norm_data; |
373 *vqpp = *qpp; | 375 qpointers->power = qpointers->power_data; |
374 | 376 |
375 if(usingQueryPoint) { | 377 if(usingQueryPoint) { |
376 if( !(queryPoint < *nvp && queryPoint < *nvp - sequence_length + 1) ) { | 378 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) { |
377 error("queryPoint >= numVectors-sequence_length+1 in query"); | 379 error("queryPoint >= nvectors-sequence_length+1 in query"); |
378 } else { | 380 } else { |
379 VERB_LOG(1, "query point: %u\n", queryPoint); | 381 VERB_LOG(1, "query point: %u\n", queryPoint); |
380 *vqp = *qp + queryPoint * dbH->dim; | 382 *vqp = *qp + queryPoint * dbH->dim; |
381 *vqnp = *qnp + queryPoint; | 383 qpointers->l2norm = qpointers->l2norm_data + queryPoint; |
382 if (usingPower) { | 384 if (usingPower) { |
383 *vqpp = *qpp + queryPoint; | 385 qpointers->power = qpointers->power_data + queryPoint; |
384 } | 386 } |
385 *nvp = sequence_length; | 387 qpointers->nvectors = sequence_length; |
386 } | 388 } |
387 } | 389 } |
388 } | 390 } |
389 | 391 |
390 // Does the same as set_up_query(...) but from database features instead of from a file | 392 // Does the same as set_up_query(...) but from database features instead of from a file |
391 // Constructs the same outputs as set_up_query | 393 // Constructs the same outputs as set_up_query |
392 void audioDB::set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) { | 394 void audioDB::set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers, Uns32T queryIndex) { |
393 uint32_t sequence_length = spec->qid.sequence_length; | 395 uint32_t sequence_length = spec->qid.sequence_length; |
394 if(!trackTable) | 396 if(!trackTable) |
395 error("trackTable not initialized","set_up_query_from_key"); | 397 error("trackTable not initialized","set_up_query_from_key"); |
396 | 398 |
397 if(!(dbH->flags & O2_FLAG_L2NORM)) { | 399 if(!(dbH->flags & O2_FLAG_L2NORM)) { |
402 usingPower = true; | 404 usingPower = true; |
403 | 405 |
404 if(dbH->flags & O2_FLAG_TIMES) | 406 if(dbH->flags & O2_FLAG_TIMES) |
405 usingTimes = true; | 407 usingTimes = true; |
406 | 408 |
407 *nvp = trackTable[queryIndex]; | 409 uint32_t nvectors = trackTable[queryIndex]; |
408 if(*nvp < sequence_length) { | 410 qpointers->nvectors = nvectors; |
411 if(nvectors < sequence_length) { | |
409 error("Query shorter than requested sequence length", "maybe use -l"); | 412 error("Query shorter than requested sequence length", "maybe use -l"); |
410 } | 413 } |
411 | 414 |
412 VERB_LOG(1, "performing norms... "); | 415 VERB_LOG(1, "performing norms... "); |
413 | 416 |
424 initInputFile(prefixedString, false); // nommap, file pointer at correct position | 427 initInputFile(prefixedString, false); // nommap, file pointer at correct position |
425 size_t allocatedSize = 0; | 428 size_t allocatedSize = 0; |
426 if(audiodb_read_data(adb, infid, queryIndex, qp, &allocatedSize)) | 429 if(audiodb_read_data(adb, infid, queryIndex, qp, &allocatedSize)) |
427 error("failed to read data"); // over-writes qp and allocatedSize | 430 error("failed to read data"); // over-writes qp and allocatedSize |
428 // Consistency check on allocated memory and query feature size | 431 // Consistency check on allocated memory and query feature size |
429 if(*nvp*sizeof(double)*dbH->dim != allocatedSize) | 432 if(nvectors*sizeof(double)*dbH->dim != allocatedSize) |
430 error("Query memory allocation failed consitency check","set_up_query_from_key"); | 433 error("Query memory allocation failed consitency check","set_up_query_from_key"); |
431 // Allocated and calculate auxillary sequences: l2norm and power | 434 // Allocated and calculate auxillary sequences: l2norm and power |
432 init_track_aux_data(queryIndex, *qp, qnp, vqnp, qpp, vqpp); | 435 init_track_aux_data(queryIndex, *qp, &qpointers->l2norm_data, &qpointers->l2norm, &qpointers->power_data, &qpointers->power); |
433 } | 436 } |
434 else{ // Load from self-contained ADB database | 437 else{ // Load from self-contained ADB database |
435 // Read query feature vectors from database | 438 // Read query feature vectors from database |
436 *qp = NULL; | 439 *qp = NULL; |
437 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET); | 440 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET); |
438 size_t allocatedSize = 0; | 441 size_t allocatedSize = 0; |
439 if(audiodb_read_data(adb, dbfid, queryIndex, qp, &allocatedSize)) | 442 if(audiodb_read_data(adb, dbfid, queryIndex, qp, &allocatedSize)) |
440 error("failed to read data"); | 443 error("failed to read data"); |
441 // Consistency check on allocated memory and query feature size | 444 // Consistency check on allocated memory and query feature size |
442 if(*nvp*sizeof(double)*dbH->dim != allocatedSize) | 445 if(nvectors*sizeof(double)*dbH->dim != allocatedSize) |
443 error("Query memory allocation failed consitency check","set_up_query_from_key"); | 446 error("Query memory allocation failed consitency check","set_up_query_from_key"); |
444 | 447 |
445 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors | 448 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors |
446 // Copy L2 norm partial-sum coefficients | 449 // Copy L2 norm partial-sum coefficients |
447 assert(*qnp = new double[*nvp]); | 450 assert(qpointers->l2norm_data = new double[nvectors]); |
448 memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double)); | 451 memcpy(qpointers->l2norm_data, l2normTable+trackIndexOffset, nvectors*sizeof(double)); |
449 audiodb_sequence_sum(*qnp, *nvp, sequence_length); | 452 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); |
450 audiodb_sequence_sqrt(*qnp, *nvp, sequence_length); | 453 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); |
451 | 454 |
452 if( usingPower ){ | 455 if( usingPower ){ |
453 // Copy Power partial-sum coefficients | 456 // Copy Power partial-sum coefficients |
454 assert(*qpp = new double[*nvp]); | 457 assert(qpointers->power_data = new double[nvectors]); |
455 memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double)); | 458 memcpy(qpointers->power_data, powerTable+trackIndexOffset, nvectors*sizeof(double)); |
456 audiodb_sequence_sum(*qpp, *nvp, sequence_length); | 459 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); |
457 audiodb_sequence_average(*qpp, *nvp, sequence_length); | 460 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); |
458 } | 461 } |
459 | 462 |
460 if (usingTimes) { | 463 if (usingTimes) { |
461 unsigned int k; | 464 unsigned int k; |
462 *mqdp = 0.0; | 465 qpointers->mean_duration = new double[1]; |
463 double *querydurs = new double[*nvp]; | 466 *qpointers->mean_duration = 0.0; |
464 double *timesdata = new double[*nvp*2]; | 467 double *querydurs = new double[nvectors]; |
468 double *timesdata = new double[nvectors*2]; | |
465 assert(querydurs && timesdata); | 469 assert(querydurs && timesdata); |
466 memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double)); | 470 memcpy(timesdata, timesTable+trackIndexOffset, nvectors*sizeof(double)); |
467 for(k = 0; k < *nvp; k++) { | 471 for(k = 0; k < nvectors; k++) { |
468 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | 472 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; |
469 *mqdp += querydurs[k]; | 473 *qpointers->mean_duration += querydurs[k]; |
470 } | 474 } |
471 *mqdp /= k; | 475 *qpointers->mean_duration /= k; |
472 | 476 |
473 VERB_LOG(1, "mean query file duration: %f\n", *mqdp); | 477 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration); |
474 | 478 |
475 delete [] querydurs; | 479 delete [] querydurs; |
476 delete [] timesdata; | 480 delete [] timesdata; |
477 } | 481 } |
478 } | 482 } |
479 | 483 |
480 // Defaults, for exhaustive search (!usingQueryPoint) | 484 // Defaults, for exhaustive search (!usingQueryPoint) |
481 *vqp = *qp; | 485 *vqp = *qp; |
482 *vqnp = *qnp; | 486 qpointers->l2norm = qpointers->l2norm_data; |
483 *vqpp = *qpp; | 487 qpointers->power = qpointers->power_data; |
484 | 488 |
485 if(usingQueryPoint) { | 489 if(usingQueryPoint) { |
486 if( !(queryPoint < *nvp && queryPoint < *nvp - sequence_length + 1) ) { | 490 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) { |
487 error("queryPoint >= numVectors-sequence_length+1 in query"); | 491 error("queryPoint >= nvectors-sequence_length+1 in query"); |
488 } else { | 492 } else { |
489 VERB_LOG(1, "query point: %u\n", queryPoint); | 493 VERB_LOG(1, "query point: %u\n", queryPoint); |
490 *vqp = *qp + queryPoint * dbH->dim; | 494 *vqp = *qp + queryPoint * dbH->dim; |
491 *vqnp = *qnp + queryPoint; | 495 qpointers->l2norm = qpointers->l2norm_data + queryPoint; |
492 if (usingPower) { | 496 if (usingPower) { |
493 *vqpp = *qpp + queryPoint; | 497 qpointers->power = qpointers->power_data + queryPoint; |
494 } | 498 } |
495 *nvp = sequence_length; | 499 qpointers->nvectors = sequence_length; |
496 } | 500 } |
497 } | 501 } |
498 } | 502 } |
499 | 503 |
500 | 504 |
501 // FIXME: this is not the right name; we're not actually setting up | 505 // FIXME: this is not the right name; we're not actually setting up |
502 // the database, but copying various bits of it out of mmap()ed tables | 506 // the database, but copying various bits of it out of mmap()ed tables |
503 // in order to reduce seeks. | 507 // in order to reduce seeks. |
504 int audioDB::set_up_db(adb_t *adb, adb_query_spec_t *spec, double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) { | 508 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) { |
505 uint32_t sequence_length = spec->qid.sequence_length; | 509 uint32_t sequence_length = spec->qid.sequence_length; |
510 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | |
511 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO; | |
512 double *times_table = NULL; | |
506 | 513 |
507 *dvp = adb->header->length / (adb->header->dim * sizeof(double)); | 514 *dvp = adb->header->length / (adb->header->dim * sizeof(double)); |
508 *snp = new double[*dvp]; | 515 *snp = new double[*dvp]; |
509 | 516 |
510 double *snpp = *snp, *sppp = 0; | 517 double *snpp = *snp, *sppp = 0; |
511 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET); | 518 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET); |
512 read_or_goto_error(adb->fd, *snp, *dvp * sizeof(double)); | 519 read_or_goto_error(adb->fd, *snp, *dvp * sizeof(double)); |
513 | 520 |
514 if (usingPower) { | 521 if (using_power) { |
515 if (!(adb->header->flags & O2_FLAG_POWER)) { | 522 if (!(adb->header->flags & O2_FLAG_POWER)) { |
516 goto error; | 523 goto error; |
517 } | 524 } |
518 *spp = new double[*dvp]; | 525 *spp = new double[*dvp]; |
519 sppp = *spp; | 526 sppp = *spp; |
524 for(unsigned int i = 0; i < adb->header->numFiles; i++){ | 531 for(unsigned int i = 0; i < adb->header->numFiles; i++){ |
525 size_t track_length = (*adb->track_lengths)[i]; | 532 size_t track_length = (*adb->track_lengths)[i]; |
526 if(track_length >= sequence_length) { | 533 if(track_length >= sequence_length) { |
527 audiodb_sequence_sum(snpp, track_length, sequence_length); | 534 audiodb_sequence_sum(snpp, track_length, sequence_length); |
528 audiodb_sequence_sqrt(snpp, track_length, sequence_length); | 535 audiodb_sequence_sqrt(snpp, track_length, sequence_length); |
529 if (usingPower) { | 536 if (using_power) { |
530 audiodb_sequence_sum(sppp, track_length, sequence_length); | 537 audiodb_sequence_sum(sppp, track_length, sequence_length); |
531 audiodb_sequence_average(sppp, track_length, sequence_length); | 538 audiodb_sequence_average(sppp, track_length, sequence_length); |
532 } | 539 } |
533 } | 540 } |
534 snpp += track_length; | 541 snpp += track_length; |
535 if (usingPower) { | 542 if (using_power) { |
536 sppp += track_length; | 543 sppp += track_length; |
537 } | 544 } |
538 } | 545 } |
539 | 546 |
540 if (usingTimes) { | 547 if (using_times) { |
541 if(!(adb->header->flags & O2_FLAG_TIMES)) { | 548 if(!(adb->header->flags & O2_FLAG_TIMES)) { |
542 error("query timestamps provided for non-timed database", dbName); | 549 goto error; |
543 } | 550 } |
544 | 551 |
545 *mddp = new double[adb->header->numFiles]; | 552 *mddp = new double[adb->header->numFiles]; |
546 | 553 |
554 times_table = (double *) malloc(2 * *dvp * sizeof(double)); | |
555 if(!times_table) { | |
556 goto error; | |
557 } | |
558 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET); | |
559 read_or_goto_error(adb->fd, times_table, 2 * *dvp * sizeof(double)); | |
547 for(unsigned int k = 0; k < adb->header->numFiles; k++) { | 560 for(unsigned int k = 0; k < adb->header->numFiles; k++) { |
548 size_t track_length = (*adb->track_lengths)[k]; | 561 size_t track_length = (*adb->track_lengths)[k]; |
549 unsigned int j; | 562 unsigned int j; |
550 (*mddp)[k] = 0.0; | 563 (*mddp)[k] = 0.0; |
551 for(j = 0; j < track_length; j++) { | 564 for(j = 0; j < track_length; j++) { |
552 (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j]; | 565 (*mddp)[k] += times_table[2*j+1] - times_table[2*j]; |
553 } | 566 } |
554 (*mddp)[k] /= j; | 567 (*mddp)[k] /= j; |
555 } | 568 } |
569 | |
570 free(times_table); | |
571 times_table = NULL; | |
556 } | 572 } |
557 | 573 |
558 *vsnp = *snp; | 574 *vsnp = *snp; |
559 *vspp = *spp; | 575 *vspp = *spp; |
560 return 0; | 576 return 0; |
566 if(*spp) { | 582 if(*spp) { |
567 delete [] *spp; | 583 delete [] *spp; |
568 } | 584 } |
569 if(*mddp) { | 585 if(*mddp) { |
570 delete [] *mddp; | 586 delete [] *mddp; |
587 } | |
588 if(times_table) { | |
589 free(times_table); | |
571 } | 590 } |
572 return 1; | 591 return 1; |
573 | 592 |
574 } | 593 } |
575 | 594 |
585 // A reporter has been allocated | 604 // A reporter has been allocated |
586 // | 605 // |
587 // Postconditions: | 606 // Postconditions: |
588 // reporter contains the points and distances that meet the reporter constraints | 607 // reporter contains the points and distances that meet the reporter constraints |
589 | 608 |
590 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){ | 609 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) { |
591 unsigned int dbVectors; | 610 unsigned int dbVectors; |
592 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0; | 611 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0; |
593 double *meanDBdur = 0; | 612 double *meanDBdur = 0; |
594 | 613 |
595 uint32_t sequence_length = spec->qid.sequence_length; | 614 uint32_t sequence_length = spec->qid.sequence_length; |
615 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | |
596 | 616 |
597 // check pre-conditions | 617 // check pre-conditions |
598 assert(exact_evaluation_queue&&reporter); | 618 assert(exact_evaluation_queue&&reporter); |
599 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate | 619 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate |
600 return; | 620 return; |
601 | 621 |
602 // Compute database info | 622 // Compute database info |
603 // FIXME: we more than likely don't need very much of the database | 623 // FIXME: we more than likely don't need very much of the database |
604 // so make a new method to build these values per-track or, even better, per-point | 624 // so make a new method to build these values per-track or, even better, per-point |
605 if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) | 625 if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) |
606 if(set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) { | 626 if(audiodb_set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) { |
607 error("failed to set up db"); | 627 error("failed to set up db"); |
608 } | 628 } |
609 | 629 |
610 VERB_LOG(1, "matching points..."); | 630 VERB_LOG(1, "matching points..."); |
611 | 631 |
656 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset | 676 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset |
657 } | 677 } |
658 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point | 678 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point |
659 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table | 679 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table |
660 // Test power thresholds before computing distance | 680 // Test power thresholds before computing distance |
661 if( ( !usingPower || audiodb_powers_acceptable(&spec->refine, qpPtr[qPos], sPower[sPos])) && | 681 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], sPower[sPos])) && |
662 ( qPos<numVectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){ | 682 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){ |
663 // Non-large ADB track data is loaded inside power test for efficiency | 683 // Non-large ADB track data is loaded inside power test for efficiency |
664 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ | 684 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ |
665 // On currentTrack change, allocate and load track data | 685 // On currentTrack change, allocate and load track data |
666 currentTrack=pp.trackID; | 686 currentTrack=pp.trackID; |
667 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | 687 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); |
668 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size)) | 688 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size)) |
669 error("failed to read data"); | 689 error("failed to read data"); |
670 } | 690 } |
671 // Compute distance | 691 // Compute distance |
672 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length); | 692 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length); |
673 double qn = qnPtr[qPos]; | 693 double qn = qpointers->l2norm[qPos]; |
674 double sn = sNorm[sPos]; | 694 double sn = sNorm[sPos]; |
675 switch(spec->params.distance) { | 695 switch(spec->params.distance) { |
676 case ADB_DISTANCE_EUCLIDEAN_NORMED: | 696 case ADB_DISTANCE_EUCLIDEAN_NORMED: |
677 dist = 2 - (2/(qn*sn))*dist; | 697 dist = 2 - (2/(qn*sn))*dist; |
678 break; | 698 break; |
697 SAFE_DELETE_ARRAY(meanDBdur); | 717 SAFE_DELETE_ARRAY(meanDBdur); |
698 } | 718 } |
699 | 719 |
700 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) { | 720 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) { |
701 | 721 |
702 unsigned int numVectors; | |
703 double *query, *query_data; | 722 double *query, *query_data; |
704 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0; | 723 adb_qpointers_internal_t qpointers = {0}; |
705 double meanQdur; | 724 |
725 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | |
706 | 726 |
707 if( dbH->flags & O2_FLAG_LARGE_ADB ) | 727 if( dbH->flags & O2_FLAG_LARGE_ADB ) |
708 error("error: LARGE_ADB requires indexed query"); | 728 error("error: LARGE_ADB requires indexed query"); |
709 | 729 |
710 if(query_from_key) | 730 if(query_from_key) |
711 set_up_query_from_key(spec, &query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex); | 731 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); |
712 else | 732 else |
713 set_up_query(spec, &query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); | 733 set_up_query(spec, &query_data, &query, &qpointers); |
714 | 734 |
715 unsigned int dbVectors; | 735 unsigned int dbVectors; |
716 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; | 736 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; |
717 double *meanDBdur = 0; | 737 double *meanDBdur = 0; |
718 | 738 |
719 if(set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) { | 739 if(audiodb_set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) { |
720 error("failed to set up db"); | 740 error("failed to set up db"); |
721 } | 741 } |
722 | 742 |
723 VERB_LOG(1, "matching tracks..."); | 743 VERB_LOG(1, "matching tracks..."); |
724 | 744 |
725 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop; | 745 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop; |
726 unsigned wL = spec->qid.sequence_length; | 746 unsigned wL = spec->qid.sequence_length; |
727 double **D = 0; // Differences query and target | 747 double **D = 0; // Differences query and target |
728 double **DD = 0; // Matched filter distance | 748 double **DD = 0; // Matched filter distance |
729 | 749 |
730 D = new double*[numVectors]; // pre-allocate | 750 D = new double*[qpointers.nvectors]; // pre-allocate |
731 DD = new double*[numVectors]; | 751 DD = new double*[qpointers.nvectors]; |
732 | 752 |
733 gettimeofday(&tv1, NULL); | 753 gettimeofday(&tv1, NULL); |
734 unsigned processedTracks = 0; | 754 unsigned processedTracks = 0; |
735 off_t trackIndexOffset; | 755 off_t trackIndexOffset; |
736 char nextKey[MAXSTR]; | 756 char nextKey[MAXSTR]; |
769 else{ | 789 else{ |
770 break; | 790 break; |
771 } | 791 } |
772 } | 792 } |
773 | 793 |
774 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | 794 trackIndexOffset=trackOffset/dbH->dim; // qpointers.nvectors offset |
775 | 795 |
776 if(audiodb_read_data(adb, dbfid, track, &data_buffer, &data_buffer_size)) | 796 if(audiodb_read_data(adb, dbfid, track, &data_buffer, &data_buffer_size)) |
777 error("failed to read data"); | 797 error("failed to read data"); |
778 if(wL <= trackTable[track]) { // test for short sequences | 798 if(wL <= trackTable[track]) { // test for short sequences |
779 | 799 |
780 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); | 800 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); |
781 | 801 |
782 initialize_arrays(adb, spec, track, numVectors, query, data_buffer, D, DD); | 802 initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD); |
783 | 803 |
784 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { | 804 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { |
785 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]); | 805 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", qpointers.mean_duration[0], meanDBdur[track]); |
786 } | 806 } |
787 | 807 |
788 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) || fabs(meanDBdur[track]-meanQdur) < meanQdur*spec->refine.duration_ratio) { | 808 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) || |
809 fabs(meanDBdur[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) { | |
789 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { | 810 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { |
790 VERB_LOG(3,"within duration tolerance.\n"); | 811 VERB_LOG(3,"within duration tolerance.\n"); |
791 } | 812 } |
792 | 813 |
793 // Search for minimum distance by shingles (concatenated vectors) | 814 // Search for minimum distance by shingles (concatenated vectors) |
794 for(j = 0; j <= numVectors - wL; j += HOP_SIZE) { | 815 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) { |
795 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { | 816 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { |
796 double thisDist = 0; | 817 double thisDist = 0; |
797 switch(spec->params.distance) { | 818 switch(spec->params.distance) { |
798 case ADB_DISTANCE_EUCLIDEAN_NORMED: | 819 case ADB_DISTANCE_EUCLIDEAN_NORMED: |
799 thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | 820 thisDist = 2-(2/(qpointers.l2norm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; |
800 break; | 821 break; |
801 case ADB_DISTANCE_EUCLIDEAN: | 822 case ADB_DISTANCE_EUCLIDEAN: |
802 thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; | 823 thisDist = qpointers.l2norm[j]*qpointers.l2norm[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; |
803 break; | 824 break; |
804 case ADB_DISTANCE_DOT_PRODUCT: | 825 case ADB_DISTANCE_DOT_PRODUCT: |
805 thisDist = DD[j][k]; | 826 thisDist = DD[j][k]; |
806 break; | 827 break; |
807 } | 828 } |
808 // Power test | 829 // Power test |
809 if ((!usingPower) || audiodb_powers_acceptable(&spec->refine, qpPtr[j], sPower[trackIndexOffset + k])) { | 830 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], sPower[trackIndexOffset + k])) { |
810 // radius test | 831 // radius test |
811 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || | 832 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || |
812 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { | 833 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { |
813 adb_result_t r; | 834 adb_result_t r; |
814 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE; | 835 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE; |
819 } | 840 } |
820 } | 841 } |
821 } | 842 } |
822 } | 843 } |
823 } // Duration match | 844 } // Duration match |
824 audiodb_delete_arrays(track, numVectors, D, DD); | 845 audiodb_delete_arrays(track, qpointers.nvectors, D, DD); |
825 } | 846 } |
826 } | 847 } |
827 | 848 |
828 free(data_buffer); | 849 free(data_buffer); |
829 | 850 |
833 (tv1.tv_sec*1000 + tv1.tv_usec/1000)) | 854 (tv1.tv_sec*1000 + tv1.tv_usec/1000)) |
834 | 855 |
835 // Clean up | 856 // Clean up |
836 if(query_data) | 857 if(query_data) |
837 delete[] query_data; | 858 delete[] query_data; |
838 if(qNorm) | 859 if(qpointers.l2norm_data) |
839 delete[] qNorm; | 860 delete[] qpointers.l2norm_data; |
861 if(qpointers.power_data) | |
862 delete[] qpointers.power_data; | |
863 if(qpointers.mean_duration) | |
864 delete[] qpointers.mean_duration; | |
840 if(sNorm) | 865 if(sNorm) |
841 delete[] sNorm; | 866 delete[] sNorm; |
842 if(qPower) | |
843 delete[] qPower; | |
844 if(sPower) | 867 if(sPower) |
845 delete[] sPower; | 868 delete[] sPower; |
846 if(D) | 869 if(D) |
847 delete[] D; | 870 delete[] D; |
848 if(DD) | 871 if(DD) |