comparison query.cpp @ 438:8c1d8a40db91 api-inversion

audioDB::set_up_db can use an adb_qpointers_internal_t... ... make it so.
author mas01cr
date Wed, 24 Dec 2008 10:56:12 +0000
parents 9a065b8db769
children 5294ea1b1bf2
comparison
equal deleted inserted replaced
437:9a065b8db769 438:8c1d8a40db91
503 503
504 504
505 // FIXME: this is not the right name; we're not actually setting up 505 // FIXME: this is not the right name; we're not actually setting up
506 // the database, but copying various bits of it out of mmap()ed tables 506 // the database, but copying various bits of it out of mmap()ed tables
507 // in order to reduce seeks. 507 // in order to reduce seeks.
508 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) { 508 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
509 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
509 uint32_t sequence_length = spec->qid.sequence_length; 510 uint32_t sequence_length = spec->qid.sequence_length;
511
510 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); 512 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
511 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO; 513 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
512 double *times_table = NULL; 514 double *times_table = NULL;
513 515
514 *dvp = adb->header->length / (adb->header->dim * sizeof(double)); 516
515 *snp = new double[*dvp]; 517 dbpointers->nvectors = nvectors;
516 518 dbpointers->l2norm_data = new double[nvectors];
517 double *snpp = *snp, *sppp = 0; 519
520 double *snpp = dbpointers->l2norm_data, *sppp = 0;
518 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET); 521 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
519 read_or_goto_error(adb->fd, *snp, *dvp * sizeof(double)); 522 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
520 523
521 if (using_power) { 524 if (using_power) {
522 if (!(adb->header->flags & O2_FLAG_POWER)) { 525 if (!(adb->header->flags & O2_FLAG_POWER)) {
523 goto error; 526 goto error;
524 } 527 }
525 *spp = new double[*dvp]; 528 dbpointers->power_data = new double[nvectors];
526 sppp = *spp; 529 sppp = dbpointers->power_data;
527 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET); 530 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
528 read_or_goto_error(adb->fd, *spp, *dvp * sizeof(double)); 531 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
529 } 532 }
530 533
531 for(unsigned int i = 0; i < adb->header->numFiles; i++){ 534 for(unsigned int i = 0; i < adb->header->numFiles; i++){
532 size_t track_length = (*adb->track_lengths)[i]; 535 size_t track_length = (*adb->track_lengths)[i];
533 if(track_length >= sequence_length) { 536 if(track_length >= sequence_length) {
547 if (using_times) { 550 if (using_times) {
548 if(!(adb->header->flags & O2_FLAG_TIMES)) { 551 if(!(adb->header->flags & O2_FLAG_TIMES)) {
549 goto error; 552 goto error;
550 } 553 }
551 554
552 *mddp = new double[adb->header->numFiles]; 555 dbpointers->mean_duration = new double[adb->header->numFiles];
553 556
554 times_table = (double *) malloc(2 * *dvp * sizeof(double)); 557 times_table = (double *) malloc(2 * nvectors * sizeof(double));
555 if(!times_table) { 558 if(!times_table) {
556 goto error; 559 goto error;
557 } 560 }
558 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET); 561 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
559 read_or_goto_error(adb->fd, times_table, 2 * *dvp * sizeof(double)); 562 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
560 for(unsigned int k = 0; k < adb->header->numFiles; k++) { 563 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
561 size_t track_length = (*adb->track_lengths)[k]; 564 size_t track_length = (*adb->track_lengths)[k];
562 unsigned int j; 565 unsigned int j;
563 (*mddp)[k] = 0.0; 566 dbpointers->mean_duration[k] = 0.0;
564 for(j = 0; j < track_length; j++) { 567 for(j = 0; j < track_length; j++) {
565 (*mddp)[k] += times_table[2*j+1] - times_table[2*j]; 568 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
566 } 569 }
567 (*mddp)[k] /= j; 570 dbpointers->mean_duration[k] /= j;
568 } 571 }
569 572
570 free(times_table); 573 free(times_table);
571 times_table = NULL; 574 times_table = NULL;
572 } 575 }
573 576
574 *vsnp = *snp; 577 dbpointers->l2norm = dbpointers->l2norm_data;
575 *vspp = *spp; 578 dbpointers->power = dbpointers->power_data;
576 return 0; 579 return 0;
577 580
578 error: 581 error:
579 if(*snp) { 582 if(dbpointers->l2norm_data) {
580 delete [] *snp; 583 delete [] dbpointers->l2norm_data;
581 } 584 }
582 if(*spp) { 585 if(dbpointers->power_data) {
583 delete [] *spp; 586 delete [] dbpointers->power_data;
584 } 587 }
585 if(*mddp) { 588 if(dbpointers->mean_duration) {
586 delete [] *mddp; 589 delete [] dbpointers->mean_duration;
587 } 590 }
588 if(times_table) { 591 if(times_table) {
589 free(times_table); 592 free(times_table);
590 } 593 }
591 return 1; 594 return 1;
605 // 608 //
606 // Postconditions: 609 // Postconditions:
607 // reporter contains the points and distances that meet the reporter constraints 610 // reporter contains the points and distances that meet the reporter constraints
608 611
609 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) { 612 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) {
610 unsigned int dbVectors; 613 adb_qpointers_internal_t dbpointers = {0};
611 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0;
612 double *meanDBdur = 0;
613 614
614 uint32_t sequence_length = spec->qid.sequence_length; 615 uint32_t sequence_length = spec->qid.sequence_length;
615 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); 616 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
616 617
617 // check pre-conditions 618 // check pre-conditions
621 622
622 // Compute database info 623 // Compute database info
623 // FIXME: we more than likely don't need very much of the database 624 // FIXME: we more than likely don't need very much of the database
624 // so make a new method to build these values per-track or, even better, per-point 625 // so make a new method to build these values per-track or, even better, per-point
625 if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) 626 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
626 if(audiodb_set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) { 627 if(audiodb_set_up_db(adb, spec, &dbpointers)) {
627 error("failed to set up db"); 628 error("failed to set up db");
628 } 629 }
629 630
630 VERB_LOG(1, "matching points..."); 631 VERB_LOG(1, "matching points...");
631 632
651 if(currentTrack!=pp.trackID){ 652 if(currentTrack!=pp.trackID){
652 char* prefixedString = new char[O2_MAXFILESTR]; 653 char* prefixedString = new char[O2_MAXFILESTR];
653 char* tmpStr = prefixedString; 654 char* tmpStr = prefixedString;
654 // On currentTrack change, allocate and load track data 655 // On currentTrack change, allocate and load track data
655 currentTrack=pp.trackID; 656 currentTrack=pp.trackID;
656 SAFE_DELETE_ARRAY(sNorm); 657 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
657 SAFE_DELETE_ARRAY(sPower); 658 SAFE_DELETE_ARRAY(dbpointers.power_data);
658 if(infid>0) 659 if(infid>0)
659 close(infid); 660 close(infid);
660 // Open and check dimensions of feature file 661 // Open and check dimensions of feature file
661 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); 662 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
662 prefix_name((char ** const) &prefixedString, adb_feature_root); 663 prefix_name((char ** const) &prefixedString, adb_feature_root);
665 initInputFile(prefixedString, false); // nommap, file pointer at correct position 666 initInputFile(prefixedString, false); // nommap, file pointer at correct position
666 // Load the feature vector data for current track into data_buffer 667 // Load the feature vector data for current track into data_buffer
667 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size)) 668 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size))
668 error("failed to read data"); 669 error("failed to read data");
669 // Load power and calculate power and l2norm sequence sums 670 // Load power and calculate power and l2norm sequence sums
670 init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr); 671 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power);
671 } 672 }
672 } 673 }
673 else{ 674 else{
674 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables 675 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
675 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset 676 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
676 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset 677 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
677 } 678 }
678 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point 679 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
679 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table 680 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
680 // Test power thresholds before computing distance 681 // Test power thresholds before computing distance
681 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], sPower[sPos])) && 682 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
682 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){ 683 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){
683 // Non-large ADB track data is loaded inside power test for efficiency 684 // Non-large ADB track data is loaded inside power test for efficiency
684 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ 685 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
685 // On currentTrack change, allocate and load track data 686 // On currentTrack change, allocate and load track data
686 currentTrack=pp.trackID; 687 currentTrack=pp.trackID;
689 error("failed to read data"); 690 error("failed to read data");
690 } 691 }
691 // Compute distance 692 // Compute distance
692 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length); 693 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length);
693 double qn = qpointers->l2norm[qPos]; 694 double qn = qpointers->l2norm[qPos];
694 double sn = sNorm[sPos]; 695 double sn = dbpointers.l2norm[sPos];
695 switch(spec->params.distance) { 696 switch(spec->params.distance) {
696 case ADB_DISTANCE_EUCLIDEAN_NORMED: 697 case ADB_DISTANCE_EUCLIDEAN_NORMED:
697 dist = 2 - (2/(qn*sn))*dist; 698 dist = 2 - (2/(qn*sn))*dist;
698 break; 699 break;
699 case ADB_DISTANCE_EUCLIDEAN: 700 case ADB_DISTANCE_EUCLIDEAN:
710 } 711 }
711 } 712 }
712 exact_evaluation_queue->pop(); 713 exact_evaluation_queue->pop();
713 } 714 }
714 // Cleanup 715 // Cleanup
715 SAFE_DELETE_ARRAY(sNorm); 716 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
716 SAFE_DELETE_ARRAY(sPower); 717 SAFE_DELETE_ARRAY(dbpointers.power_data);
717 SAFE_DELETE_ARRAY(meanDBdur); 718 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
718 } 719 }
719 720
720 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) { 721 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) {
721 722
722 double *query, *query_data; 723 double *query, *query_data;
723 adb_qpointers_internal_t qpointers = {0}; 724 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
724 725
725 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); 726 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
726 727
727 if( dbH->flags & O2_FLAG_LARGE_ADB ) 728 if( dbH->flags & O2_FLAG_LARGE_ADB )
728 error("error: LARGE_ADB requires indexed query"); 729 error("error: LARGE_ADB requires indexed query");
730 if(query_from_key) 731 if(query_from_key)
731 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); 732 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex);
732 else 733 else
733 set_up_query(spec, &query_data, &query, &qpointers); 734 set_up_query(spec, &query_data, &query, &qpointers);
734 735
735 unsigned int dbVectors; 736 if(audiodb_set_up_db(adb, spec, &dbpointers)) {
736 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0;
737 double *meanDBdur = 0;
738
739 if(audiodb_set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) {
740 error("failed to set up db"); 737 error("failed to set up db");
741 } 738 }
742 739
743 VERB_LOG(1, "matching tracks..."); 740 VERB_LOG(1, "matching tracks...");
744 741
800 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); 797 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]);
801 798
802 initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD); 799 initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
803 800
804 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { 801 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
805 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", qpointers.mean_duration[0], meanDBdur[track]); 802 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", qpointers.mean_duration[0], dbpointers.mean_duration[track]);
806 } 803 }
807 804
808 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) || 805 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
809 fabs(meanDBdur[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) { 806 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
810 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { 807 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
811 VERB_LOG(3,"within duration tolerance.\n"); 808 VERB_LOG(3,"within duration tolerance.\n");
812 } 809 }
813 810
814 // Search for minimum distance by shingles (concatenated vectors) 811 // Search for minimum distance by shingles (concatenated vectors)
815 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) { 812 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
816 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { 813 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
817 double thisDist = 0; 814 double thisDist = 0;
815 double qn = qpointers.l2norm[j];
816 double sn = dbpointers.l2norm[trackIndexOffset + k];
818 switch(spec->params.distance) { 817 switch(spec->params.distance) {
819 case ADB_DISTANCE_EUCLIDEAN_NORMED: 818 case ADB_DISTANCE_EUCLIDEAN_NORMED:
820 thisDist = 2-(2/(qpointers.l2norm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; 819 thisDist = 2-(2/(qn*sn))*DD[j][k];
821 break; 820 break;
822 case ADB_DISTANCE_EUCLIDEAN: 821 case ADB_DISTANCE_EUCLIDEAN:
823 thisDist = qpointers.l2norm[j]*qpointers.l2norm[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; 822 thisDist = qn*qn + sn*sn - 2*DD[j][k];
824 break; 823 break;
825 case ADB_DISTANCE_DOT_PRODUCT: 824 case ADB_DISTANCE_DOT_PRODUCT:
826 thisDist = DD[j][k]; 825 thisDist = DD[j][k];
827 break; 826 break;
828 } 827 }
829 // Power test 828 // Power test
830 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], sPower[trackIndexOffset + k])) { 829 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
831 // radius test 830 // radius test
832 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || 831 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
833 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { 832 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
834 adb_result_t r; 833 adb_result_t r;
835 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE; 834 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE;
860 delete[] qpointers.l2norm_data; 859 delete[] qpointers.l2norm_data;
861 if(qpointers.power_data) 860 if(qpointers.power_data)
862 delete[] qpointers.power_data; 861 delete[] qpointers.power_data;
863 if(qpointers.mean_duration) 862 if(qpointers.mean_duration)
864 delete[] qpointers.mean_duration; 863 delete[] qpointers.mean_duration;
865 if(sNorm) 864 if(dbpointers.power_data)
866 delete[] sNorm; 865 delete[] dbpointers.power_data;
867 if(sPower) 866 if(dbpointers.l2norm_data)
868 delete[] sPower; 867 delete[] dbpointers.l2norm_data;
869 if(D) 868 if(D)
870 delete[] D; 869 delete[] D;
871 if(DD) 870 if(DD)
872 delete[] DD; 871 delete[] DD;
873 if(meanDBdur) 872 if(dbpointers.mean_duration)
874 delete[] meanDBdur; 873 delete[] dbpointers.mean_duration;
875 } 874 }