Mercurial > hg > audiodb
comparison query.cpp @ 444:4fe90fd568fc api-inversion
No more audioDB::set_up_query{,_from_key}
Go through audiodb_query_spec_qpointers() instead. It's a little bit
horrible, but less horrible than two almost-identical separate
functions...
author | mas01cr |
---|---|
date | Wed, 24 Dec 2008 10:56:37 +0000 |
parents | cb44e57a96fa |
children | d1771f436ff7 |
comparison
equal
deleted
inserted
replaced
443:cb44e57a96fa | 444:4fe90fd568fc |
---|---|
2 #include "reporter.h" | 2 #include "reporter.h" |
3 | 3 |
4 #include "audioDB-internals.h" | 4 #include "audioDB-internals.h" |
5 #include "accumulators.h" | 5 #include "accumulators.h" |
6 | 6 |
7 static bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) { | 7 bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) { |
8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) { | 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) { |
9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) { | 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) { |
10 return false; | 10 return false; |
11 } | 11 } |
12 } | 12 } |
25 initTables(dbName); | 25 initTables(dbName); |
26 else | 26 else |
27 initTables(dbName, inFile); | 27 initTables(dbName, inFile); |
28 | 28 |
29 adb_query_spec_t qspec; | 29 adb_query_spec_t qspec; |
30 adb_datum_t datum = {0}; | |
30 | 31 |
31 qspec.refine.flags = 0; | 32 qspec.refine.flags = 0; |
32 /* FIXME: trackFile / ADB_REFINE_KEYLIST */ | 33 /* FIXME: trackFile / ADB_REFINE_KEYLIST */ |
33 if(radius) { | 34 if(radius) { |
34 qspec.refine.flags |= ADB_REFINE_RADIUS; | 35 qspec.refine.flags |= ADB_REFINE_RADIUS; |
51 qspec.refine.hopsize = sequenceHop; | 52 qspec.refine.hopsize = sequenceHop; |
52 if(sequenceHop != 1) { | 53 if(sequenceHop != 1) { |
53 qspec.refine.flags |= ADB_REFINE_HOP_SIZE; | 54 qspec.refine.flags |= ADB_REFINE_HOP_SIZE; |
54 } | 55 } |
55 | 56 |
56 /* FIXME qspec.qid.datum */ | 57 if(query_from_key) { |
58 datum.key = key; | |
59 } else { | |
60 int fd; | |
61 struct stat st; | |
62 | |
63 /* FIXME: around here there are all sorts of hideous leaks. */ | |
64 fd = open(inFile, O_RDONLY); | |
65 if(fd < 0) { | |
66 error("failed to open feature file", inFile); | |
67 } | |
68 fstat(fd, &st); | |
69 read(fd, &datum.dim, sizeof(uint32_t)); | |
70 datum.nvectors = (st.st_size - sizeof(uint32_t)) / (datum.dim * sizeof(double)); | |
71 datum.data = (double *) malloc(st.st_size - sizeof(uint32_t)); | |
72 read(fd, datum.data, st.st_size - sizeof(uint32_t)); | |
73 close(fd); | |
74 if(usingPower) { | |
75 uint32_t one; | |
76 fd = open(powerFileName, O_RDONLY); | |
77 if(fd < 0) { | |
78 error("failed to open power file", powerFileName); | |
79 } | |
80 read(fd, &one, sizeof(uint32_t)); | |
81 if(one != 1) { | |
82 error("malformed power file dimensionality", powerFileName); | |
83 } | |
84 datum.power = (double *) malloc(datum.nvectors * sizeof(double)); | |
85 if(read(fd, datum.power, datum.nvectors * sizeof(double)) != (ssize_t) (datum.nvectors * sizeof(double))) { | |
86 error("malformed power file", powerFileName); | |
87 } | |
88 close(fd); | |
89 } | |
90 if(usingTimes) { | |
91 datum.times = (double *) malloc(2 * datum.nvectors * sizeof(double)); | |
92 insertTimeStamps(datum.nvectors, timesFile, datum.times); | |
93 } | |
94 } | |
95 | |
96 qspec.qid.datum = &datum; | |
57 qspec.qid.sequence_length = sequenceLength; | 97 qspec.qid.sequence_length = sequenceLength; |
58 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE; | 98 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE; |
59 qspec.qid.sequence_start = queryPoint; | 99 qspec.qid.sequence_start = queryPoint; |
60 | 100 |
61 switch(queryType) { | 101 switch(queryType) { |
305 if (!timesFile->eof()) { | 345 if (!timesFile->eof()) { |
306 error("too many timepoints in times file", timesFileName); | 346 error("too many timepoints in times file", timesFileName); |
307 } | 347 } |
308 } | 348 } |
309 | 349 |
310 static int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { | 350 int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { |
311 adb_datum_t *datum; | 351 adb_datum_t *datum; |
312 adb_datum_t d = {0}; | 352 adb_datum_t d = {0}; |
313 uint32_t nvectors; | 353 uint32_t nvectors; |
314 uint32_t sequence_length; | 354 uint32_t sequence_length; |
315 uint32_t sequence_start; | 355 uint32_t sequence_start; |
379 | 419 |
380 /* Now we have a full(ish) datum, compute all the qpointery stuff | 420 /* Now we have a full(ish) datum, compute all the qpointery stuff |
381 that we care about (l2norm/power/mean duration). (This bit could | 421 that we care about (l2norm/power/mean duration). (This bit could |
382 conceivably become a new function) */ | 422 conceivably become a new function) */ |
383 nvectors = d.nvectors; | 423 nvectors = d.nvectors; |
384 if(sequence_start > nvectors - sequence_length) { | 424 /* FIXME: check the overflow logic here */ |
425 if(sequence_start + sequence_length > nvectors) { | |
385 /* is there something to free? goto error */ | 426 /* is there something to free? goto error */ |
386 return 1; | 427 return 1; |
387 } | 428 } |
388 | 429 |
389 qpointers->nvectors = nvectors; | 430 qpointers->nvectors = nvectors; |
419 qpointers->l2norm = qpointers->l2norm_data; | 460 qpointers->l2norm = qpointers->l2norm_data; |
420 qpointers->power = qpointers->power_data; | 461 qpointers->power = qpointers->power_data; |
421 } else { | 462 } else { |
422 *vector = *vector_data + spec->qid.sequence_start * d.dim; | 463 *vector = *vector_data + spec->qid.sequence_start * d.dim; |
423 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start; | 464 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start; |
424 qpointers->power = qpointers->power_data + spec->qid.sequence_start; | 465 if(d.power) { |
425 } | 466 qpointers->power = qpointers->power_data + spec->qid.sequence_start; |
426 | 467 } |
468 /* FIXME: this is a little bit ugly. No, a lot ugly. But at the | |
469 * moment this is how query_loop() knows when to stop, so for | |
470 * now... */ | |
471 qpointers->nvectors = sequence_length; | |
472 } | |
427 | 473 |
428 /* Clean up: free any bits of datum that we have ourselves | 474 /* Clean up: free any bits of datum that we have ourselves |
429 * allocated. */ | 475 * allocated. */ |
430 if(datum != &d) { | 476 if(datum != &d) { |
431 audiodb_free_datum(&d); | 477 audiodb_free_datum(&d); |
432 } | 478 } |
433 } | 479 |
434 | 480 return 0; |
435 // These names deserve some unpicking. The names starting with a "q" | 481 } |
436 // are pointers to the query, norm and power vectors; the names | |
437 // starting with "v" are things that will end up pointing to the | |
438 // actual query point's information. -- CSR, 2007-12-05 | |
439 void audioDB::set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers) { | |
440 uint32_t nvectors = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); | |
441 qpointers->nvectors = nvectors; | |
442 | |
443 uint32_t sequence_length = spec->qid.sequence_length; | |
444 if(!(dbH->flags & O2_FLAG_L2NORM)) { | |
445 error("Database must be L2 normed for sequence query","use -L2NORM"); | |
446 } | |
447 | |
448 if(nvectors < sequence_length) { | |
449 error("Query shorter than requested sequence length", "maybe use -l"); | |
450 } | |
451 | |
452 VERB_LOG(1, "performing norms... "); | |
453 | |
454 *qp = new double[nvectors * dbH->dim]; | |
455 memcpy(*qp, indata+sizeof(int), nvectors * dbH->dim * sizeof(double)); | |
456 qpointers->l2norm_data = new double[nvectors]; | |
457 audiodb_l2norm_buffer(*qp, dbH->dim, nvectors, qpointers->l2norm_data); | |
458 | |
459 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); | |
460 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); | |
461 | |
462 if (usingPower) { | |
463 qpointers->power_data = new double[nvectors]; | |
464 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { | |
465 error("error seeking to data", powerFileName, "lseek"); | |
466 } | |
467 int count = read(powerfd, qpointers->power_data, nvectors * sizeof(double)); | |
468 if (count == -1) { | |
469 error("error reading data", powerFileName, "read"); | |
470 } | |
471 if ((unsigned) count != nvectors * sizeof(double)) { | |
472 error("short read", powerFileName); | |
473 } | |
474 | |
475 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); | |
476 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); | |
477 } | |
478 | |
479 if (usingTimes) { | |
480 unsigned int k; | |
481 qpointers->mean_duration = new double[1]; | |
482 *qpointers->mean_duration = 0.0; | |
483 double *querydurs = new double[nvectors]; | |
484 double *timesdata = new double[2*nvectors]; | |
485 insertTimeStamps(nvectors, timesFile, timesdata); | |
486 for(k = 0; k < nvectors; k++) { | |
487 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
488 *qpointers->mean_duration += querydurs[k]; | |
489 } | |
490 *qpointers->mean_duration /= k; | |
491 | |
492 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration); | |
493 | |
494 delete [] querydurs; | |
495 delete [] timesdata; | |
496 } | |
497 | |
498 // Defaults, for exhaustive search (!usingQueryPoint) | |
499 *vqp = *qp; | |
500 qpointers->l2norm = qpointers->l2norm_data; | |
501 qpointers->power = qpointers->power_data; | |
502 | |
503 if(usingQueryPoint) { | |
504 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) { | |
505 error("queryPoint >= nvectors-sequence_length+1 in query"); | |
506 } else { | |
507 VERB_LOG(1, "query point: %u\n", queryPoint); | |
508 *vqp = *qp + queryPoint * dbH->dim; | |
509 qpointers->l2norm = qpointers->l2norm_data + queryPoint; | |
510 if (usingPower) { | |
511 qpointers->power = qpointers->power_data + queryPoint; | |
512 } | |
513 qpointers->nvectors = sequence_length; | |
514 } | |
515 } | |
516 } | |
517 | |
518 // Does the same as set_up_query(...) but from database features instead of from a file | |
519 // Constructs the same outputs as set_up_query | |
520 void audioDB::set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers, Uns32T queryIndex) { | |
521 uint32_t sequence_length = spec->qid.sequence_length; | |
522 if(!trackTable) | |
523 error("trackTable not initialized","set_up_query_from_key"); | |
524 | |
525 if(!(dbH->flags & O2_FLAG_L2NORM)) { | |
526 error("Database must be L2 normed for sequence query","use -L2NORM"); | |
527 } | |
528 | |
529 if(dbH->flags & O2_FLAG_POWER) | |
530 usingPower = true; | |
531 | |
532 if(dbH->flags & O2_FLAG_TIMES) | |
533 usingTimes = true; | |
534 | |
535 uint32_t nvectors = trackTable[queryIndex]; | |
536 qpointers->nvectors = nvectors; | |
537 if(nvectors < sequence_length) { | |
538 error("Query shorter than requested sequence length", "maybe use -l"); | |
539 } | |
540 | |
541 VERB_LOG(1, "performing norms... "); | |
542 | |
543 // For LARGE_ADB load query features from file | |
544 if( dbH->flags & O2_FLAG_LARGE_ADB ){ | |
545 if(infid>0) | |
546 close(infid); | |
547 char* prefixedString = new char[O2_MAXFILESTR]; | |
548 char* tmpStr = prefixedString; | |
549 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); | |
550 prefix_name(&prefixedString, adb_feature_root); | |
551 if(tmpStr!=prefixedString) | |
552 delete[] tmpStr; | |
553 initInputFile(prefixedString, false); // nommap, file pointer at correct position | |
554 size_t allocatedSize = 0; | |
555 if(audiodb_read_data(adb, infid, queryIndex, qp, &allocatedSize)) | |
556 error("failed to read data"); // over-writes qp and allocatedSize | |
557 // Consistency check on allocated memory and query feature size | |
558 if(nvectors*sizeof(double)*dbH->dim != allocatedSize) | |
559 error("Query memory allocation failed consitency check","set_up_query_from_key"); | |
560 // Allocated and calculate auxillary sequences: l2norm and power | |
561 init_track_aux_data(queryIndex, *qp, &qpointers->l2norm_data, &qpointers->l2norm, &qpointers->power_data, &qpointers->power); | |
562 } | |
563 else{ // Load from self-contained ADB database | |
564 // Read query feature vectors from database | |
565 *qp = NULL; | |
566 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET); | |
567 size_t allocatedSize = 0; | |
568 if(audiodb_read_data(adb, dbfid, queryIndex, qp, &allocatedSize)) | |
569 error("failed to read data"); | |
570 // Consistency check on allocated memory and query feature size | |
571 if(nvectors*sizeof(double)*dbH->dim != allocatedSize) | |
572 error("Query memory allocation failed consitency check","set_up_query_from_key"); | |
573 | |
574 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors | |
575 // Copy L2 norm partial-sum coefficients | |
576 assert(qpointers->l2norm_data = new double[nvectors]); | |
577 memcpy(qpointers->l2norm_data, l2normTable+trackIndexOffset, nvectors*sizeof(double)); | |
578 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); | |
579 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); | |
580 | |
581 if( usingPower ){ | |
582 // Copy Power partial-sum coefficients | |
583 assert(qpointers->power_data = new double[nvectors]); | |
584 memcpy(qpointers->power_data, powerTable+trackIndexOffset, nvectors*sizeof(double)); | |
585 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); | |
586 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); | |
587 } | |
588 | |
589 if (usingTimes) { | |
590 unsigned int k; | |
591 qpointers->mean_duration = new double[1]; | |
592 *qpointers->mean_duration = 0.0; | |
593 double *querydurs = new double[nvectors]; | |
594 double *timesdata = new double[nvectors*2]; | |
595 assert(querydurs && timesdata); | |
596 memcpy(timesdata, timesTable+trackIndexOffset, nvectors*sizeof(double)); | |
597 for(k = 0; k < nvectors; k++) { | |
598 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
599 *qpointers->mean_duration += querydurs[k]; | |
600 } | |
601 *qpointers->mean_duration /= k; | |
602 | |
603 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration); | |
604 | |
605 delete [] querydurs; | |
606 delete [] timesdata; | |
607 } | |
608 } | |
609 | |
610 // Defaults, for exhaustive search (!usingQueryPoint) | |
611 *vqp = *qp; | |
612 qpointers->l2norm = qpointers->l2norm_data; | |
613 qpointers->power = qpointers->power_data; | |
614 | |
615 if(usingQueryPoint) { | |
616 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) { | |
617 error("queryPoint >= nvectors-sequence_length+1 in query"); | |
618 } else { | |
619 VERB_LOG(1, "query point: %u\n", queryPoint); | |
620 *vqp = *qp + queryPoint * dbH->dim; | |
621 qpointers->l2norm = qpointers->l2norm_data + queryPoint; | |
622 if (usingPower) { | |
623 qpointers->power = qpointers->power_data + queryPoint; | |
624 } | |
625 qpointers->nvectors = sequence_length; | |
626 } | |
627 } | |
628 } | |
629 | |
630 | 482 |
631 // FIXME: this is not the right name; we're not actually setting up | 483 // FIXME: this is not the right name; we're not actually setting up |
632 // the database, but copying various bits of it out of mmap()ed tables | 484 // the database, but copying various bits of it out of mmap()ed tables |
633 // in order to reduce seeks. | 485 // in order to reduce seeks. |
634 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) { | 486 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) { |
852 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | 704 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); |
853 | 705 |
854 if( dbH->flags & O2_FLAG_LARGE_ADB ) | 706 if( dbH->flags & O2_FLAG_LARGE_ADB ) |
855 error("error: LARGE_ADB requires indexed query"); | 707 error("error: LARGE_ADB requires indexed query"); |
856 | 708 |
857 if(query_from_key) | 709 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { |
858 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex); | 710 error("failed to set up qpointers"); |
859 else | 711 } |
860 set_up_query(spec, &query_data, &query, &qpointers); | |
861 | 712 |
862 if(audiodb_set_up_db(adb, spec, &dbpointers)) { | 713 if(audiodb_set_up_db(adb, spec, &dbpointers)) { |
863 error("failed to set up db"); | 714 error("failed to set up db"); |
864 } | 715 } |
865 | 716 |