comparison query.cpp @ 444:4fe90fd568fc api-inversion

No more audioDB::set_up_query{,_from_key} Go through audiodb_query_spec_qpointers() instead. It's a little bit horrible, but less horrible than two almost-identical separate functions...
author mas01cr
date Wed, 24 Dec 2008 10:56:37 +0000
parents cb44e57a96fa
children d1771f436ff7
comparison
equal deleted inserted replaced
443:cb44e57a96fa 444:4fe90fd568fc
2 #include "reporter.h" 2 #include "reporter.h"
3 3
4 #include "audioDB-internals.h" 4 #include "audioDB-internals.h"
5 #include "accumulators.h" 5 #include "accumulators.h"
6 6
7 static bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) { 7 bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) {
8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) { 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) { 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
10 return false; 10 return false;
11 } 11 }
12 } 12 }
25 initTables(dbName); 25 initTables(dbName);
26 else 26 else
27 initTables(dbName, inFile); 27 initTables(dbName, inFile);
28 28
29 adb_query_spec_t qspec; 29 adb_query_spec_t qspec;
30 adb_datum_t datum = {0};
30 31
31 qspec.refine.flags = 0; 32 qspec.refine.flags = 0;
32 /* FIXME: trackFile / ADB_REFINE_KEYLIST */ 33 /* FIXME: trackFile / ADB_REFINE_KEYLIST */
33 if(radius) { 34 if(radius) {
34 qspec.refine.flags |= ADB_REFINE_RADIUS; 35 qspec.refine.flags |= ADB_REFINE_RADIUS;
51 qspec.refine.hopsize = sequenceHop; 52 qspec.refine.hopsize = sequenceHop;
52 if(sequenceHop != 1) { 53 if(sequenceHop != 1) {
53 qspec.refine.flags |= ADB_REFINE_HOP_SIZE; 54 qspec.refine.flags |= ADB_REFINE_HOP_SIZE;
54 } 55 }
55 56
56 /* FIXME qspec.qid.datum */ 57 if(query_from_key) {
58 datum.key = key;
59 } else {
60 int fd;
61 struct stat st;
62
63 /* FIXME: around here there are all sorts of hideous leaks. */
64 fd = open(inFile, O_RDONLY);
65 if(fd < 0) {
66 error("failed to open feature file", inFile);
67 }
68 fstat(fd, &st);
69 read(fd, &datum.dim, sizeof(uint32_t));
70 datum.nvectors = (st.st_size - sizeof(uint32_t)) / (datum.dim * sizeof(double));
71 datum.data = (double *) malloc(st.st_size - sizeof(uint32_t));
72 read(fd, datum.data, st.st_size - sizeof(uint32_t));
73 close(fd);
74 if(usingPower) {
75 uint32_t one;
76 fd = open(powerFileName, O_RDONLY);
77 if(fd < 0) {
78 error("failed to open power file", powerFileName);
79 }
80 read(fd, &one, sizeof(uint32_t));
81 if(one != 1) {
82 error("malformed power file dimensionality", powerFileName);
83 }
84 datum.power = (double *) malloc(datum.nvectors * sizeof(double));
85 if(read(fd, datum.power, datum.nvectors * sizeof(double)) != (ssize_t) (datum.nvectors * sizeof(double))) {
86 error("malformed power file", powerFileName);
87 }
88 close(fd);
89 }
90 if(usingTimes) {
91 datum.times = (double *) malloc(2 * datum.nvectors * sizeof(double));
92 insertTimeStamps(datum.nvectors, timesFile, datum.times);
93 }
94 }
95
96 qspec.qid.datum = &datum;
57 qspec.qid.sequence_length = sequenceLength; 97 qspec.qid.sequence_length = sequenceLength;
58 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE; 98 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE;
59 qspec.qid.sequence_start = queryPoint; 99 qspec.qid.sequence_start = queryPoint;
60 100
61 switch(queryType) { 101 switch(queryType) {
305 if (!timesFile->eof()) { 345 if (!timesFile->eof()) {
306 error("too many timepoints in times file", timesFileName); 346 error("too many timepoints in times file", timesFileName);
307 } 347 }
308 } 348 }
309 349
310 static int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { 350 int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
311 adb_datum_t *datum; 351 adb_datum_t *datum;
312 adb_datum_t d = {0}; 352 adb_datum_t d = {0};
313 uint32_t nvectors; 353 uint32_t nvectors;
314 uint32_t sequence_length; 354 uint32_t sequence_length;
315 uint32_t sequence_start; 355 uint32_t sequence_start;
379 419
380 /* Now we have a full(ish) datum, compute all the qpointery stuff 420 /* Now we have a full(ish) datum, compute all the qpointery stuff
381 that we care about (l2norm/power/mean duration). (This bit could 421 that we care about (l2norm/power/mean duration). (This bit could
382 conceivably become a new function) */ 422 conceivably become a new function) */
383 nvectors = d.nvectors; 423 nvectors = d.nvectors;
384 if(sequence_start > nvectors - sequence_length) { 424 /* FIXME: check the overflow logic here */
425 if(sequence_start + sequence_length > nvectors) {
385 /* is there something to free? goto error */ 426 /* is there something to free? goto error */
386 return 1; 427 return 1;
387 } 428 }
388 429
389 qpointers->nvectors = nvectors; 430 qpointers->nvectors = nvectors;
419 qpointers->l2norm = qpointers->l2norm_data; 460 qpointers->l2norm = qpointers->l2norm_data;
420 qpointers->power = qpointers->power_data; 461 qpointers->power = qpointers->power_data;
421 } else { 462 } else {
422 *vector = *vector_data + spec->qid.sequence_start * d.dim; 463 *vector = *vector_data + spec->qid.sequence_start * d.dim;
423 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start; 464 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
424 qpointers->power = qpointers->power_data + spec->qid.sequence_start; 465 if(d.power) {
425 } 466 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
426 467 }
468 /* FIXME: this is a little bit ugly. No, a lot ugly. But at the
469 * moment this is how query_loop() knows when to stop, so for
470 * now... */
471 qpointers->nvectors = sequence_length;
472 }
427 473
428 /* Clean up: free any bits of datum that we have ourselves 474 /* Clean up: free any bits of datum that we have ourselves
429 * allocated. */ 475 * allocated. */
430 if(datum != &d) { 476 if(datum != &d) {
431 audiodb_free_datum(&d); 477 audiodb_free_datum(&d);
432 } 478 }
433 } 479
434 480 return 0;
435 // These names deserve some unpicking. The names starting with a "q" 481 }
436 // are pointers to the query, norm and power vectors; the names
437 // starting with "v" are things that will end up pointing to the
438 // actual query point's information. -- CSR, 2007-12-05
439 void audioDB::set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers) {
440 uint32_t nvectors = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double));
441 qpointers->nvectors = nvectors;
442
443 uint32_t sequence_length = spec->qid.sequence_length;
444 if(!(dbH->flags & O2_FLAG_L2NORM)) {
445 error("Database must be L2 normed for sequence query","use -L2NORM");
446 }
447
448 if(nvectors < sequence_length) {
449 error("Query shorter than requested sequence length", "maybe use -l");
450 }
451
452 VERB_LOG(1, "performing norms... ");
453
454 *qp = new double[nvectors * dbH->dim];
455 memcpy(*qp, indata+sizeof(int), nvectors * dbH->dim * sizeof(double));
456 qpointers->l2norm_data = new double[nvectors];
457 audiodb_l2norm_buffer(*qp, dbH->dim, nvectors, qpointers->l2norm_data);
458
459 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
460 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
461
462 if (usingPower) {
463 qpointers->power_data = new double[nvectors];
464 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
465 error("error seeking to data", powerFileName, "lseek");
466 }
467 int count = read(powerfd, qpointers->power_data, nvectors * sizeof(double));
468 if (count == -1) {
469 error("error reading data", powerFileName, "read");
470 }
471 if ((unsigned) count != nvectors * sizeof(double)) {
472 error("short read", powerFileName);
473 }
474
475 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
476 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
477 }
478
479 if (usingTimes) {
480 unsigned int k;
481 qpointers->mean_duration = new double[1];
482 *qpointers->mean_duration = 0.0;
483 double *querydurs = new double[nvectors];
484 double *timesdata = new double[2*nvectors];
485 insertTimeStamps(nvectors, timesFile, timesdata);
486 for(k = 0; k < nvectors; k++) {
487 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
488 *qpointers->mean_duration += querydurs[k];
489 }
490 *qpointers->mean_duration /= k;
491
492 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration);
493
494 delete [] querydurs;
495 delete [] timesdata;
496 }
497
498 // Defaults, for exhaustive search (!usingQueryPoint)
499 *vqp = *qp;
500 qpointers->l2norm = qpointers->l2norm_data;
501 qpointers->power = qpointers->power_data;
502
503 if(usingQueryPoint) {
504 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) {
505 error("queryPoint >= nvectors-sequence_length+1 in query");
506 } else {
507 VERB_LOG(1, "query point: %u\n", queryPoint);
508 *vqp = *qp + queryPoint * dbH->dim;
509 qpointers->l2norm = qpointers->l2norm_data + queryPoint;
510 if (usingPower) {
511 qpointers->power = qpointers->power_data + queryPoint;
512 }
513 qpointers->nvectors = sequence_length;
514 }
515 }
516 }
517
518 // Does the same as set_up_query(...) but from database features instead of from a file
519 // Constructs the same outputs as set_up_query
520 void audioDB::set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers, Uns32T queryIndex) {
521 uint32_t sequence_length = spec->qid.sequence_length;
522 if(!trackTable)
523 error("trackTable not initialized","set_up_query_from_key");
524
525 if(!(dbH->flags & O2_FLAG_L2NORM)) {
526 error("Database must be L2 normed for sequence query","use -L2NORM");
527 }
528
529 if(dbH->flags & O2_FLAG_POWER)
530 usingPower = true;
531
532 if(dbH->flags & O2_FLAG_TIMES)
533 usingTimes = true;
534
535 uint32_t nvectors = trackTable[queryIndex];
536 qpointers->nvectors = nvectors;
537 if(nvectors < sequence_length) {
538 error("Query shorter than requested sequence length", "maybe use -l");
539 }
540
541 VERB_LOG(1, "performing norms... ");
542
543 // For LARGE_ADB load query features from file
544 if( dbH->flags & O2_FLAG_LARGE_ADB ){
545 if(infid>0)
546 close(infid);
547 char* prefixedString = new char[O2_MAXFILESTR];
548 char* tmpStr = prefixedString;
549 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
550 prefix_name(&prefixedString, adb_feature_root);
551 if(tmpStr!=prefixedString)
552 delete[] tmpStr;
553 initInputFile(prefixedString, false); // nommap, file pointer at correct position
554 size_t allocatedSize = 0;
555 if(audiodb_read_data(adb, infid, queryIndex, qp, &allocatedSize))
556 error("failed to read data"); // over-writes qp and allocatedSize
557 // Consistency check on allocated memory and query feature size
558 if(nvectors*sizeof(double)*dbH->dim != allocatedSize)
559 error("Query memory allocation failed consitency check","set_up_query_from_key");
560 // Allocated and calculate auxillary sequences: l2norm and power
561 init_track_aux_data(queryIndex, *qp, &qpointers->l2norm_data, &qpointers->l2norm, &qpointers->power_data, &qpointers->power);
562 }
563 else{ // Load from self-contained ADB database
564 // Read query feature vectors from database
565 *qp = NULL;
566 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
567 size_t allocatedSize = 0;
568 if(audiodb_read_data(adb, dbfid, queryIndex, qp, &allocatedSize))
569 error("failed to read data");
570 // Consistency check on allocated memory and query feature size
571 if(nvectors*sizeof(double)*dbH->dim != allocatedSize)
572 error("Query memory allocation failed consitency check","set_up_query_from_key");
573
574 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
575 // Copy L2 norm partial-sum coefficients
576 assert(qpointers->l2norm_data = new double[nvectors]);
577 memcpy(qpointers->l2norm_data, l2normTable+trackIndexOffset, nvectors*sizeof(double));
578 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
579 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
580
581 if( usingPower ){
582 // Copy Power partial-sum coefficients
583 assert(qpointers->power_data = new double[nvectors]);
584 memcpy(qpointers->power_data, powerTable+trackIndexOffset, nvectors*sizeof(double));
585 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
586 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
587 }
588
589 if (usingTimes) {
590 unsigned int k;
591 qpointers->mean_duration = new double[1];
592 *qpointers->mean_duration = 0.0;
593 double *querydurs = new double[nvectors];
594 double *timesdata = new double[nvectors*2];
595 assert(querydurs && timesdata);
596 memcpy(timesdata, timesTable+trackIndexOffset, nvectors*sizeof(double));
597 for(k = 0; k < nvectors; k++) {
598 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
599 *qpointers->mean_duration += querydurs[k];
600 }
601 *qpointers->mean_duration /= k;
602
603 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration);
604
605 delete [] querydurs;
606 delete [] timesdata;
607 }
608 }
609
610 // Defaults, for exhaustive search (!usingQueryPoint)
611 *vqp = *qp;
612 qpointers->l2norm = qpointers->l2norm_data;
613 qpointers->power = qpointers->power_data;
614
615 if(usingQueryPoint) {
616 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) {
617 error("queryPoint >= nvectors-sequence_length+1 in query");
618 } else {
619 VERB_LOG(1, "query point: %u\n", queryPoint);
620 *vqp = *qp + queryPoint * dbH->dim;
621 qpointers->l2norm = qpointers->l2norm_data + queryPoint;
622 if (usingPower) {
623 qpointers->power = qpointers->power_data + queryPoint;
624 }
625 qpointers->nvectors = sequence_length;
626 }
627 }
628 }
629
630 482
631 // FIXME: this is not the right name; we're not actually setting up 483 // FIXME: this is not the right name; we're not actually setting up
632 // the database, but copying various bits of it out of mmap()ed tables 484 // the database, but copying various bits of it out of mmap()ed tables
633 // in order to reduce seeks. 485 // in order to reduce seeks.
634 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) { 486 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
852 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); 704 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
853 705
854 if( dbH->flags & O2_FLAG_LARGE_ADB ) 706 if( dbH->flags & O2_FLAG_LARGE_ADB )
855 error("error: LARGE_ADB requires indexed query"); 707 error("error: LARGE_ADB requires indexed query");
856 708
857 if(query_from_key) 709 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
858 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); 710 error("failed to set up qpointers");
859 else 711 }
860 set_up_query(spec, &query_data, &query, &qpointers);
861 712
862 if(audiodb_set_up_db(adb, spec, &dbpointers)) { 713 if(audiodb_set_up_db(adb, spec, &dbpointers)) {
863 error("failed to set up db"); 714 error("failed to set up db");
864 } 715 }
865 716