mas01cr@693: extern "C" { mas01cr@693: #include "audioDB_API.h" mas01cr@693: } mas01cr@693: #include "audioDB-internals.h" mas01cr@693: #include "accumulators.h" mas01cr@280: mas01cr@693: /* 0. if the datum in the spec is sufficiently NULL, do db x db mas01cr@693: sampling; mas01cr@280: mas01cr@693: 1. if accumulation is DATABASE, do basically the current sampling mas01cr@693: with track1 being the datum; mas01cr@693: mas01cr@693: 2. if accumulation is PER_TRACK, sample n points per db track mas01cr@693: rather than n points overall. mas01cr@693: mas01cr@693: 3. if accumulation is ONE_TO_ONE, ponder hard. mas01cr@693: */ mas01cr@693: adb_query_results_t *audiodb_sample_spec(adb_t *adb, const adb_query_spec_t *qspec) { mas01cr@693: adb_qstate_internal_t qstate = {0}; mas01cr@693: /* FIXME: this paragraph is the same as in audiodb_query_spec() */ mas01cr@693: qstate.allowed_keys = new std::set; mas01cr@693: adb_query_results_t *results; mas01cr@693: if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) { mas01cr@693: for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) { mas01cr@693: qstate.allowed_keys->insert(qspec->refine.include.keys[k]); mas01cr@693: } mas01cr@693: } else { mas01cr@693: for(unsigned int k = 0; k < adb->header->numFiles; k++) { mas01cr@693: qstate.allowed_keys->insert((*adb->keys)[k]); mas01cr@693: } mas01cr@693: } mas01cr@693: if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) { mas01cr@693: for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) { mas01cr@693: qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]); mas01cr@280: } mas01cr@280: } mas01cr@280: mas01cr@695: /* FIXME: this paragraph is the same as in audiodb_query_spec(). */ mas01cr@695: switch(qspec->params.distance) { mas01cr@695: case ADB_DISTANCE_DOT_PRODUCT: mas01cr@695: switch(qspec->params.accumulation) { mas01cr@695: case ADB_ACCUMULATION_DB: mas01cr@693: qstate.accumulator = new DBAccumulator(qspec->params.npoints); mas01cr@693: break; mas01cr@695: case ADB_ACCUMULATION_PER_TRACK: mas01cr@695: qstate.accumulator = new PerTrackAccumulator(qspec->params.npoints, qspec->params.ntracks); mas01cr@695: break; mas01cr@695: case ADB_ACCUMULATION_ONE_TO_ONE: mas01cr@695: qstate.accumulator = new NearestAccumulator(); mas01cr@693: break; mas01cr@693: default: mas01cr@693: goto error; mas01cr@693: } mas01cr@695: break; mas01cr@695: case ADB_DISTANCE_EUCLIDEAN_NORMED: mas01cr@695: case ADB_DISTANCE_EUCLIDEAN: mas01cr@695: switch(qspec->params.accumulation) { mas01cr@695: case ADB_ACCUMULATION_DB: mas01cr@695: qstate.accumulator = new DBAccumulator(qspec->params.npoints); mas01cr@693: break; mas01cr@695: case ADB_ACCUMULATION_PER_TRACK: mas01cr@695: qstate.accumulator = new PerTrackAccumulator(qspec->params.npoints, qspec->params.ntracks); mas01cr@695: break; mas01cr@695: case ADB_ACCUMULATION_ONE_TO_ONE: mas01cr@695: qstate.accumulator = new NearestAccumulator(); mas01cr@695: break; mas01cr@693: default: mas01cr@693: goto error; mas01cr@693: } mas01cr@695: break; mas01cr@695: default: mas01cr@695: goto error; mas01cr@693: } mas01cr@693: mas01cr@693: if(audiodb_sample_loop(adb, qspec, &qstate)) { mas01cr@693: goto error; mas01cr@693: } mas01cr@693: mas01cr@693: results = qstate.accumulator->get_points(); mas01cr@693: mas01cr@693: delete qstate.accumulator; mas01cr@693: delete qstate.allowed_keys; mas01cr@693: mas01cr@693: return results; mas01cr@693: mas01cr@693: error: mas01cr@693: if(qstate.accumulator) mas01cr@693: delete qstate.accumulator; mas01cr@693: if(qstate.allowed_keys) mas01cr@693: delete qstate.allowed_keys; mas01cr@693: return NULL; mas01cr@280: } mas01cr@280: mas01cr@693: uint32_t audiodb_random_track(adb_t *adb, uint32_t *propTable, unsigned total) { mas01cr@280: /* FIXME: make this O(1) by using the alias-rejection method, or mas01cr@280: some other sensible method of sampling from a discrete mas01cr@280: distribution. */ mas01cr@693: double thing = gsl_rng_uniform(adb->rng); mas01cr@280: unsigned sofar = 0; mas01cr@693: for (unsigned int i = 0; i < adb->header->numFiles; i++) { mas01cr@280: sofar += propTable[i]; mas01cr@280: if (thing < ((double) sofar / (double) total)) { mas01cr@280: return i; mas01cr@280: } mas01cr@280: } mas01cr@693: /* can't happen */ mas01cr@765: printf("sofar: %d; total: %d; thing: %f\n", sofar, total, thing); mas01cr@693: return (uint32_t) -1; mas01cr@280: } mas01cr@280: mas01cr@693: int audiodb_sample_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) { mas01cr@693: /* FIXME: all eerily reminiscent of audiodb_query_loop() */ mas01cr@693: double *query, *query_data; mas01cr@693: adb_qpointers_internal_t qpointers = {0}, dbpointers = {0}; mas01cr@693: mas01cr@693: if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) { mas01cr@693: /* FIXME: someday */ mas01cr@693: return 1; mas01cr@280: } mas01cr@280: mas01cr@693: if(spec->qid.datum) { mas01cr@693: if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { mas01cr@693: return 1; mas01cr@693: } mas01cr@280: } mas01cr@280: mas01cr@693: /* mas01cr@693: if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) { mas01cr@693: return 1; mas01cr@693: } mas01cr@693: */ mas01cr@693: mas01cr@693: /* three cases: mas01cr@693: mas01cr@693: 1. datum is null: Select two random tracks and two mas01cr@693: offsets into those tracks, compute distance, and add to mas01cr@693: accumulator. mas01cr@693: mas01cr@693: 2. datum is not null and mas01cr@693: mas01cr@693: (a) ADB_QID_FLAG_EXHAUSTIVE is set: select one random track mas01cr@693: and offset, and one offset into datum, compute distance, add mas01cr@693: to accumulator. mas01cr@693: mas01cr@693: (b) ADB_QID_FLAG_EXHAUSTIVE is not set: select one random mas01cr@693: track and offset, compute distance with datum, add to mas01cr@693: accumulator. mas01cr@693: mas01cr@693: That all sounds simple, right? Oh, but we also need to take care mas01cr@693: of refinement: not just hop sizes, but power thresholding, track mas01cr@693: inclusion, duration ratio (theoretically), radius thresholding mas01cr@693: (no, that doesn't make sense). mas01cr@693: mas01cr@693: Also, for ADB_ACCUMULATE_PER_TRACK, we need to do effectively mas01cr@693: case 2 for each of the allowed keys. mas01cr@693: mas01cr@693: */ mas01cr@693: mas01cr@693: if(spec->refine.flags & ADB_REFINE_RADIUS) { mas01cr@693: /* doesn't make sense */ mas01cr@693: return 1; mas01cr@280: } mas01cr@280: mas01cr@693: if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) { mas01cr@693: /* does make sense in principle but my brain is too small today */ mas01cr@693: return -1; mas01cr@693: } mas01cr@280: mas01cr@693: if(spec->params.accumulation != ADB_ACCUMULATION_DB) { mas01cr@693: /* likewise, brain too small */ mas01cr@693: return -1; mas01cr@693: } mas01cr@280: mas01cr@693: uint32_t qhop, ihop; mas01cr@693: if(spec->refine.flags & ADB_REFINE_HOP_SIZE) { mas01cr@693: qhop = spec->refine.qhopsize; mas01cr@693: qhop = qhop ? qhop : 1; mas01cr@693: ihop = spec->refine.ihopsize; mas01cr@693: ihop = ihop ? ihop : 1; mas01cr@693: } else { mas01cr@693: qhop = 1; mas01cr@693: ihop = 1; mas01cr@693: } mas01cr@693: mas01cr@693: uint32_t total = 0; mas01cr@693: uint32_t *props = new uint32_t[adb->header->numFiles]; mas01cr@693: mas01cr@693: std::set::iterator keys_end = qstate->allowed_keys->end(); mas01cr@693: for(uint32_t i = 0; i < adb->header->numFiles; i++) { mas01cr@693: if(qstate->allowed_keys->find((*adb->keys)[i]) == keys_end) { mas01cr@765: props[i] = 0; mas01cr@693: } else { mas01cr@766: int32_t add = (*adb->track_lengths)[i] - spec->qid.sequence_length + 1; mas01cr@765: props[i] = add > 0 ? add : 0; mas01cr@765: total += props[i]; mas01cr@659: } mas01cr@659: } mas01cr@693: mas01cr@693: double *v1 = NULL; mas01cr@693: double *v2 = new double[adb->header->dim * spec->qid.sequence_length]; mas01cr@693: mas01cr@693: if(!spec->qid.datum) { mas01cr@693: v1 = new double[adb->header->dim * spec->qid.sequence_length]; mas01cr@693: } mas01cr@693: mas01cr@693: uint32_t i1, i2, track1 = 0, track2; mas01cr@693: mas01cr@693: uint32_t npoints = spec->params.npoints; mas01cr@693: for(uint32_t nsamples = 0; nsamples < npoints;) { mas01cr@693: mas01cr@693: if(spec->qid.datum) { mas01cr@693: if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) { mas01cr@693: i1 = qhop * gsl_rng_uniform_int(adb->rng, (qpointers.nvectors - spec->qid.sequence_length + qhop - 1) / qhop); mas01cr@693: v1 = query + i1 * adb->header->dim; mas01cr@693: } else { mas01cr@693: i1 = spec->qid.sequence_start; mas01cr@693: v1 = query; mas01cr@693: } mas01cr@693: track2 = audiodb_random_track(adb, props, total); mas01cr@693: i2 = ihop * gsl_rng_uniform_int(adb->rng, (props[track2] + ihop - 1) / ihop); mas01cr@693: off_t offset2 = (*adb->track_offsets)[track2] + i2 * adb->header->dim * sizeof(double); mas01cr@693: uint32_t length = adb->header->dim * spec->qid.sequence_length * sizeof(double); mas01cr@693: lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset2); mas01cr@693: read_or_goto_error(adb->fd, v2, length); mas01cr@659: } else { mas01cr@693: /* FIXME: Theoretically we should pass ihop here, but that's a mas01cr@693: small correction that I'm ignoring for now */ mas01cr@693: track1 = audiodb_random_track(adb, props, total); mas01cr@693: track2 = audiodb_random_track(adb, props, total); mas01cr@693: if(track1 == track2) { mas01cr@693: continue; mas01cr@693: } mas01cr@693: mas01cr@693: i1 = ihop * gsl_rng_uniform_int(adb->rng, (props[track1] + ihop - 1) / ihop); mas01cr@693: i2 = ihop * gsl_rng_uniform_int(adb->rng, (props[track2] + ihop - 1) / ihop); mas01cr@693: mas01cr@693: off_t offset1 = (*adb->track_offsets)[track1] + i1 * adb->header->dim * sizeof(double); mas01cr@693: off_t offset2 = (*adb->track_offsets)[track2] + i2 * adb->header->dim * sizeof(double); mas01cr@693: uint32_t length = adb->header->dim * spec->qid.sequence_length * sizeof(double); mas01cr@693: lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset1); mas01cr@693: read_or_goto_error(adb->fd, v1, length); mas01cr@693: lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset2); mas01cr@693: read_or_goto_error(adb->fd, v2, length); mas01cr@659: } mas01cr@280: mas01cr@693: /* FIXME: replace at least the norms with dbpointers */ mas01cr@693: double v1norm = 0; mas01cr@693: double v2norm = 0; mas01cr@693: double v1v2 = 0; mas01cr@693: double dist = 0; mas01cr@280: mas01cr@693: uint32_t vlen = spec->qid.sequence_length * adb->header->dim; mas01cr@280: for (unsigned int j = 0; j < vlen; j++) { mas01cr@280: v1norm += v1[j]*v1[j]; mas01cr@280: v2norm += v2[j]*v2[j]; mas01cr@280: v1v2 += v1[j]*v2[j]; mas01cr@280: } mas01cr@280: mas01cr@693: /* FIXME: do power thresholding test */ mas01cr@280: if(isfinite(v1norm) && isfinite(v2norm) && isfinite(v1v2)) { mas01cr@693: switch(spec->params.distance) { mas01cr@693: case ADB_DISTANCE_EUCLIDEAN_NORMED: mas01cr@693: dist = 2 - 2 * v1v2 / sqrt(v1norm * v2norm); mas01cr@693: break; mas01cr@693: case ADB_DISTANCE_EUCLIDEAN: mas01cr@693: dist = v1norm + v2norm - 2 * v1v2; mas01cr@693: break; mas01cr@693: case ADB_DISTANCE_DOT_PRODUCT: mas01cr@693: dist = v1v2; mas01cr@693: break; mas01cr@693: } mas01cr@693: } else { mas01cr@693: continue; mas01cr@693: } mas01cr@280: mas01cr@693: adb_result_t r; mas01cr@693: r.qkey = spec->qid.datum ? "" : (*adb->keys)[track1].c_str(); mas01cr@693: r.ikey = (*adb->keys)[track2].c_str(); mas01cr@693: r.dist = dist; mas01cr@693: r.qpos = i1; mas01cr@693: r.ipos = i2; mas01cr@693: qstate->accumulator->add_point(&r); mas01cr@693: nsamples++; mas01cr@280: } mas01cr@280: mas01cr@693: if(!spec->qid.datum) { mas01cr@693: delete[] v1; mas01cr@693: } mas01cr@693: delete[] v2; mas01cr@280: mas01cr@693: return 0; mas01cr@280: mas01cr@693: error: mas01cr@693: if(!spec->qid.datum) { mas01cr@693: delete[] v1; mas01cr@693: } mas01cr@280: delete[] v2; mas01cr@693: return 17; mas01cr@280: }