Mercurial > hg > audiodb
comparison query.cpp @ 438:8c1d8a40db91 api-inversion
audioDB::set_up_db can use an adb_qpointers_internal_t...
... make it so.
author | mas01cr |
---|---|
date | Wed, 24 Dec 2008 10:56:12 +0000 |
parents | 9a065b8db769 |
children | 5294ea1b1bf2 |
comparison
equal
deleted
inserted
replaced
437:9a065b8db769 | 438:8c1d8a40db91 |
---|---|
503 | 503 |
504 | 504 |
505 // FIXME: this is not the right name; we're not actually setting up | 505 // FIXME: this is not the right name; we're not actually setting up |
506 // the database, but copying various bits of it out of mmap()ed tables | 506 // the database, but copying various bits of it out of mmap()ed tables |
507 // in order to reduce seeks. | 507 // in order to reduce seeks. |
508 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) { | 508 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) { |
509 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double)); | |
509 uint32_t sequence_length = spec->qid.sequence_length; | 510 uint32_t sequence_length = spec->qid.sequence_length; |
511 | |
510 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | 512 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); |
511 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO; | 513 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO; |
512 double *times_table = NULL; | 514 double *times_table = NULL; |
513 | 515 |
514 *dvp = adb->header->length / (adb->header->dim * sizeof(double)); | 516 |
515 *snp = new double[*dvp]; | 517 dbpointers->nvectors = nvectors; |
516 | 518 dbpointers->l2norm_data = new double[nvectors]; |
517 double *snpp = *snp, *sppp = 0; | 519 |
520 double *snpp = dbpointers->l2norm_data, *sppp = 0; | |
518 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET); | 521 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET); |
519 read_or_goto_error(adb->fd, *snp, *dvp * sizeof(double)); | 522 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double)); |
520 | 523 |
521 if (using_power) { | 524 if (using_power) { |
522 if (!(adb->header->flags & O2_FLAG_POWER)) { | 525 if (!(adb->header->flags & O2_FLAG_POWER)) { |
523 goto error; | 526 goto error; |
524 } | 527 } |
525 *spp = new double[*dvp]; | 528 dbpointers->power_data = new double[nvectors]; |
526 sppp = *spp; | 529 sppp = dbpointers->power_data; |
527 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET); | 530 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET); |
528 read_or_goto_error(adb->fd, *spp, *dvp * sizeof(double)); | 531 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double)); |
529 } | 532 } |
530 | 533 |
531 for(unsigned int i = 0; i < adb->header->numFiles; i++){ | 534 for(unsigned int i = 0; i < adb->header->numFiles; i++){ |
532 size_t track_length = (*adb->track_lengths)[i]; | 535 size_t track_length = (*adb->track_lengths)[i]; |
533 if(track_length >= sequence_length) { | 536 if(track_length >= sequence_length) { |
547 if (using_times) { | 550 if (using_times) { |
548 if(!(adb->header->flags & O2_FLAG_TIMES)) { | 551 if(!(adb->header->flags & O2_FLAG_TIMES)) { |
549 goto error; | 552 goto error; |
550 } | 553 } |
551 | 554 |
552 *mddp = new double[adb->header->numFiles]; | 555 dbpointers->mean_duration = new double[adb->header->numFiles]; |
553 | 556 |
554 times_table = (double *) malloc(2 * *dvp * sizeof(double)); | 557 times_table = (double *) malloc(2 * nvectors * sizeof(double)); |
555 if(!times_table) { | 558 if(!times_table) { |
556 goto error; | 559 goto error; |
557 } | 560 } |
558 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET); | 561 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET); |
559 read_or_goto_error(adb->fd, times_table, 2 * *dvp * sizeof(double)); | 562 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double)); |
560 for(unsigned int k = 0; k < adb->header->numFiles; k++) { | 563 for(unsigned int k = 0; k < adb->header->numFiles; k++) { |
561 size_t track_length = (*adb->track_lengths)[k]; | 564 size_t track_length = (*adb->track_lengths)[k]; |
562 unsigned int j; | 565 unsigned int j; |
563 (*mddp)[k] = 0.0; | 566 dbpointers->mean_duration[k] = 0.0; |
564 for(j = 0; j < track_length; j++) { | 567 for(j = 0; j < track_length; j++) { |
565 (*mddp)[k] += times_table[2*j+1] - times_table[2*j]; | 568 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j]; |
566 } | 569 } |
567 (*mddp)[k] /= j; | 570 dbpointers->mean_duration[k] /= j; |
568 } | 571 } |
569 | 572 |
570 free(times_table); | 573 free(times_table); |
571 times_table = NULL; | 574 times_table = NULL; |
572 } | 575 } |
573 | 576 |
574 *vsnp = *snp; | 577 dbpointers->l2norm = dbpointers->l2norm_data; |
575 *vspp = *spp; | 578 dbpointers->power = dbpointers->power_data; |
576 return 0; | 579 return 0; |
577 | 580 |
578 error: | 581 error: |
579 if(*snp) { | 582 if(dbpointers->l2norm_data) { |
580 delete [] *snp; | 583 delete [] dbpointers->l2norm_data; |
581 } | 584 } |
582 if(*spp) { | 585 if(dbpointers->power_data) { |
583 delete [] *spp; | 586 delete [] dbpointers->power_data; |
584 } | 587 } |
585 if(*mddp) { | 588 if(dbpointers->mean_duration) { |
586 delete [] *mddp; | 589 delete [] dbpointers->mean_duration; |
587 } | 590 } |
588 if(times_table) { | 591 if(times_table) { |
589 free(times_table); | 592 free(times_table); |
590 } | 593 } |
591 return 1; | 594 return 1; |
605 // | 608 // |
606 // Postconditions: | 609 // Postconditions: |
607 // reporter contains the points and distances that meet the reporter constraints | 610 // reporter contains the points and distances that meet the reporter constraints |
608 | 611 |
609 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) { | 612 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) { |
610 unsigned int dbVectors; | 613 adb_qpointers_internal_t dbpointers = {0}; |
611 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0; | |
612 double *meanDBdur = 0; | |
613 | 614 |
614 uint32_t sequence_length = spec->qid.sequence_length; | 615 uint32_t sequence_length = spec->qid.sequence_length; |
615 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | 616 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); |
616 | 617 |
617 // check pre-conditions | 618 // check pre-conditions |
621 | 622 |
622 // Compute database info | 623 // Compute database info |
623 // FIXME: we more than likely don't need very much of the database | 624 // FIXME: we more than likely don't need very much of the database |
624 // so make a new method to build these values per-track or, even better, per-point | 625 // so make a new method to build these values per-track or, even better, per-point |
625 if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) | 626 if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) |
626 if(audiodb_set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) { | 627 if(audiodb_set_up_db(adb, spec, &dbpointers)) { |
627 error("failed to set up db"); | 628 error("failed to set up db"); |
628 } | 629 } |
629 | 630 |
630 VERB_LOG(1, "matching points..."); | 631 VERB_LOG(1, "matching points..."); |
631 | 632 |
651 if(currentTrack!=pp.trackID){ | 652 if(currentTrack!=pp.trackID){ |
652 char* prefixedString = new char[O2_MAXFILESTR]; | 653 char* prefixedString = new char[O2_MAXFILESTR]; |
653 char* tmpStr = prefixedString; | 654 char* tmpStr = prefixedString; |
654 // On currentTrack change, allocate and load track data | 655 // On currentTrack change, allocate and load track data |
655 currentTrack=pp.trackID; | 656 currentTrack=pp.trackID; |
656 SAFE_DELETE_ARRAY(sNorm); | 657 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); |
657 SAFE_DELETE_ARRAY(sPower); | 658 SAFE_DELETE_ARRAY(dbpointers.power_data); |
658 if(infid>0) | 659 if(infid>0) |
659 close(infid); | 660 close(infid); |
660 // Open and check dimensions of feature file | 661 // Open and check dimensions of feature file |
661 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); | 662 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); |
662 prefix_name((char ** const) &prefixedString, adb_feature_root); | 663 prefix_name((char ** const) &prefixedString, adb_feature_root); |
665 initInputFile(prefixedString, false); // nommap, file pointer at correct position | 666 initInputFile(prefixedString, false); // nommap, file pointer at correct position |
666 // Load the feature vector data for current track into data_buffer | 667 // Load the feature vector data for current track into data_buffer |
667 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size)) | 668 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size)) |
668 error("failed to read data"); | 669 error("failed to read data"); |
669 // Load power and calculate power and l2norm sequence sums | 670 // Load power and calculate power and l2norm sequence sums |
670 init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr); | 671 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power); |
671 } | 672 } |
672 } | 673 } |
673 else{ | 674 else{ |
674 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables | 675 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables |
675 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset | 676 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset |
676 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset | 677 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset |
677 } | 678 } |
678 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point | 679 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point |
679 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table | 680 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table |
680 // Test power thresholds before computing distance | 681 // Test power thresholds before computing distance |
681 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], sPower[sPos])) && | 682 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && |
682 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){ | 683 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){ |
683 // Non-large ADB track data is loaded inside power test for efficiency | 684 // Non-large ADB track data is loaded inside power test for efficiency |
684 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ | 685 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ |
685 // On currentTrack change, allocate and load track data | 686 // On currentTrack change, allocate and load track data |
686 currentTrack=pp.trackID; | 687 currentTrack=pp.trackID; |
689 error("failed to read data"); | 690 error("failed to read data"); |
690 } | 691 } |
691 // Compute distance | 692 // Compute distance |
692 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length); | 693 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length); |
693 double qn = qpointers->l2norm[qPos]; | 694 double qn = qpointers->l2norm[qPos]; |
694 double sn = sNorm[sPos]; | 695 double sn = dbpointers.l2norm[sPos]; |
695 switch(spec->params.distance) { | 696 switch(spec->params.distance) { |
696 case ADB_DISTANCE_EUCLIDEAN_NORMED: | 697 case ADB_DISTANCE_EUCLIDEAN_NORMED: |
697 dist = 2 - (2/(qn*sn))*dist; | 698 dist = 2 - (2/(qn*sn))*dist; |
698 break; | 699 break; |
699 case ADB_DISTANCE_EUCLIDEAN: | 700 case ADB_DISTANCE_EUCLIDEAN: |
710 } | 711 } |
711 } | 712 } |
712 exact_evaluation_queue->pop(); | 713 exact_evaluation_queue->pop(); |
713 } | 714 } |
714 // Cleanup | 715 // Cleanup |
715 SAFE_DELETE_ARRAY(sNorm); | 716 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); |
716 SAFE_DELETE_ARRAY(sPower); | 717 SAFE_DELETE_ARRAY(dbpointers.power_data); |
717 SAFE_DELETE_ARRAY(meanDBdur); | 718 SAFE_DELETE_ARRAY(dbpointers.mean_duration); |
718 } | 719 } |
719 | 720 |
720 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) { | 721 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) { |
721 | 722 |
722 double *query, *query_data; | 723 double *query, *query_data; |
723 adb_qpointers_internal_t qpointers = {0}; | 724 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0}; |
724 | 725 |
725 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | 726 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); |
726 | 727 |
727 if( dbH->flags & O2_FLAG_LARGE_ADB ) | 728 if( dbH->flags & O2_FLAG_LARGE_ADB ) |
728 error("error: LARGE_ADB requires indexed query"); | 729 error("error: LARGE_ADB requires indexed query"); |
730 if(query_from_key) | 731 if(query_from_key) |
731 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); | 732 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); |
732 else | 733 else |
733 set_up_query(spec, &query_data, &query, &qpointers); | 734 set_up_query(spec, &query_data, &query, &qpointers); |
734 | 735 |
735 unsigned int dbVectors; | 736 if(audiodb_set_up_db(adb, spec, &dbpointers)) { |
736 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; | |
737 double *meanDBdur = 0; | |
738 | |
739 if(audiodb_set_up_db(adb, spec, &sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors)) { | |
740 error("failed to set up db"); | 737 error("failed to set up db"); |
741 } | 738 } |
742 | 739 |
743 VERB_LOG(1, "matching tracks..."); | 740 VERB_LOG(1, "matching tracks..."); |
744 | 741 |
800 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); | 797 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); |
801 | 798 |
802 initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD); | 799 initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD); |
803 | 800 |
804 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { | 801 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { |
805 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", qpointers.mean_duration[0], meanDBdur[track]); | 802 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", qpointers.mean_duration[0], dbpointers.mean_duration[track]); |
806 } | 803 } |
807 | 804 |
808 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) || | 805 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) || |
809 fabs(meanDBdur[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) { | 806 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) { |
810 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { | 807 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { |
811 VERB_LOG(3,"within duration tolerance.\n"); | 808 VERB_LOG(3,"within duration tolerance.\n"); |
812 } | 809 } |
813 | 810 |
814 // Search for minimum distance by shingles (concatenated vectors) | 811 // Search for minimum distance by shingles (concatenated vectors) |
815 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) { | 812 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) { |
816 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { | 813 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { |
817 double thisDist = 0; | 814 double thisDist = 0; |
815 double qn = qpointers.l2norm[j]; | |
816 double sn = dbpointers.l2norm[trackIndexOffset + k]; | |
818 switch(spec->params.distance) { | 817 switch(spec->params.distance) { |
819 case ADB_DISTANCE_EUCLIDEAN_NORMED: | 818 case ADB_DISTANCE_EUCLIDEAN_NORMED: |
820 thisDist = 2-(2/(qpointers.l2norm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | 819 thisDist = 2-(2/(qn*sn))*DD[j][k]; |
821 break; | 820 break; |
822 case ADB_DISTANCE_EUCLIDEAN: | 821 case ADB_DISTANCE_EUCLIDEAN: |
823 thisDist = qpointers.l2norm[j]*qpointers.l2norm[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; | 822 thisDist = qn*qn + sn*sn - 2*DD[j][k]; |
824 break; | 823 break; |
825 case ADB_DISTANCE_DOT_PRODUCT: | 824 case ADB_DISTANCE_DOT_PRODUCT: |
826 thisDist = DD[j][k]; | 825 thisDist = DD[j][k]; |
827 break; | 826 break; |
828 } | 827 } |
829 // Power test | 828 // Power test |
830 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], sPower[trackIndexOffset + k])) { | 829 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) { |
831 // radius test | 830 // radius test |
832 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || | 831 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || |
833 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { | 832 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { |
834 adb_result_t r; | 833 adb_result_t r; |
835 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE; | 834 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE; |
860 delete[] qpointers.l2norm_data; | 859 delete[] qpointers.l2norm_data; |
861 if(qpointers.power_data) | 860 if(qpointers.power_data) |
862 delete[] qpointers.power_data; | 861 delete[] qpointers.power_data; |
863 if(qpointers.mean_duration) | 862 if(qpointers.mean_duration) |
864 delete[] qpointers.mean_duration; | 863 delete[] qpointers.mean_duration; |
865 if(sNorm) | 864 if(dbpointers.power_data) |
866 delete[] sNorm; | 865 delete[] dbpointers.power_data; |
867 if(sPower) | 866 if(dbpointers.l2norm_data) |
868 delete[] sPower; | 867 delete[] dbpointers.l2norm_data; |
869 if(D) | 868 if(D) |
870 delete[] D; | 869 delete[] D; |
871 if(DD) | 870 if(DD) |
872 delete[] DD; | 871 delete[] DD; |
873 if(meanDBdur) | 872 if(dbpointers.mean_duration) |
874 delete[] meanDBdur; | 873 delete[] dbpointers.mean_duration; |
875 } | 874 } |