annotate sample.cpp @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents 6a5117d68ac4
children
rev   line source
mas01cr@693 1 extern "C" {
mas01cr@693 2 #include "audioDB_API.h"
mas01cr@693 3 }
mas01cr@693 4 #include "audioDB-internals.h"
mas01cr@693 5 #include "accumulators.h"
mas01cr@280 6
mas01cr@693 7 /* 0. if the datum in the spec is sufficiently NULL, do db x db
mas01cr@693 8 sampling;
mas01cr@280 9
mas01cr@693 10 1. if accumulation is DATABASE, do basically the current sampling
mas01cr@693 11 with track1 being the datum;
mas01cr@693 12
mas01cr@693 13 2. if accumulation is PER_TRACK, sample n points per db track
mas01cr@693 14 rather than n points overall.
mas01cr@693 15
mas01cr@693 16 3. if accumulation is ONE_TO_ONE, ponder hard.
mas01cr@693 17 */
mas01cr@693 18 adb_query_results_t *audiodb_sample_spec(adb_t *adb, const adb_query_spec_t *qspec) {
mas01cr@693 19 adb_qstate_internal_t qstate = {0};
mas01cr@693 20 /* FIXME: this paragraph is the same as in audiodb_query_spec() */
mas01cr@693 21 qstate.allowed_keys = new std::set<std::string>;
mas01cr@693 22 adb_query_results_t *results;
mas01cr@693 23 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@693 24 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
mas01cr@693 25 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
mas01cr@693 26 }
mas01cr@693 27 } else {
mas01cr@693 28 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@693 29 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@693 30 }
mas01cr@693 31 }
mas01cr@693 32 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@693 33 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
mas01cr@693 34 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
mas01cr@280 35 }
mas01cr@280 36 }
mas01cr@280 37
mas01cr@695 38 /* FIXME: this paragraph is the same as in audiodb_query_spec(). */
mas01cr@695 39 switch(qspec->params.distance) {
mas01cr@695 40 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@695 41 switch(qspec->params.accumulation) {
mas01cr@695 42 case ADB_ACCUMULATION_DB:
mas01cr@693 43 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
mas01cr@693 44 break;
mas01cr@695 45 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@695 46 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@695 47 break;
mas01cr@695 48 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@695 49 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@693 50 break;
mas01cr@693 51 default:
mas01cr@693 52 goto error;
mas01cr@693 53 }
mas01cr@695 54 break;
mas01cr@695 55 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@695 56 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@695 57 switch(qspec->params.accumulation) {
mas01cr@695 58 case ADB_ACCUMULATION_DB:
mas01cr@695 59 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
mas01cr@693 60 break;
mas01cr@695 61 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@695 62 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@695 63 break;
mas01cr@695 64 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@695 65 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@695 66 break;
mas01cr@693 67 default:
mas01cr@693 68 goto error;
mas01cr@693 69 }
mas01cr@695 70 break;
mas01cr@695 71 default:
mas01cr@695 72 goto error;
mas01cr@693 73 }
mas01cr@693 74
mas01cr@693 75 if(audiodb_sample_loop(adb, qspec, &qstate)) {
mas01cr@693 76 goto error;
mas01cr@693 77 }
mas01cr@693 78
mas01cr@693 79 results = qstate.accumulator->get_points();
mas01cr@693 80
mas01cr@693 81 delete qstate.accumulator;
mas01cr@693 82 delete qstate.allowed_keys;
mas01cr@693 83
mas01cr@693 84 return results;
mas01cr@693 85
mas01cr@693 86 error:
mas01cr@693 87 if(qstate.accumulator)
mas01cr@693 88 delete qstate.accumulator;
mas01cr@693 89 if(qstate.allowed_keys)
mas01cr@693 90 delete qstate.allowed_keys;
mas01cr@693 91 return NULL;
mas01cr@280 92 }
mas01cr@280 93
mas01cr@693 94 uint32_t audiodb_random_track(adb_t *adb, uint32_t *propTable, unsigned total) {
mas01cr@280 95 /* FIXME: make this O(1) by using the alias-rejection method, or
mas01cr@280 96 some other sensible method of sampling from a discrete
mas01cr@280 97 distribution. */
mas01cr@693 98 double thing = gsl_rng_uniform(adb->rng);
mas01cr@280 99 unsigned sofar = 0;
mas01cr@693 100 for (unsigned int i = 0; i < adb->header->numFiles; i++) {
mas01cr@280 101 sofar += propTable[i];
mas01cr@280 102 if (thing < ((double) sofar / (double) total)) {
mas01cr@280 103 return i;
mas01cr@280 104 }
mas01cr@280 105 }
mas01cr@693 106 /* can't happen */
mas01cr@765 107 printf("sofar: %d; total: %d; thing: %f\n", sofar, total, thing);
mas01cr@693 108 return (uint32_t) -1;
mas01cr@280 109 }
mas01cr@280 110
mas01cr@693 111 int audiodb_sample_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@693 112 /* FIXME: all eerily reminiscent of audiodb_query_loop() */
mas01cr@693 113 double *query, *query_data;
mas01cr@693 114 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01cr@693 115
mas01cr@693 116 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@693 117 /* FIXME: someday */
mas01cr@693 118 return 1;
mas01cr@280 119 }
mas01cr@280 120
mas01cr@693 121 if(spec->qid.datum) {
mas01cr@693 122 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@693 123 return 1;
mas01cr@693 124 }
mas01cr@280 125 }
mas01cr@280 126
mas01cr@693 127 /*
mas01cr@693 128 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@693 129 return 1;
mas01cr@693 130 }
mas01cr@693 131 */
mas01cr@693 132
mas01cr@693 133 /* three cases:
mas01cr@693 134
mas01cr@693 135 1. datum is null: Select two random tracks and two
mas01cr@693 136 offsets into those tracks, compute distance, and add to
mas01cr@693 137 accumulator.
mas01cr@693 138
mas01cr@693 139 2. datum is not null and
mas01cr@693 140
mas01cr@693 141 (a) ADB_QID_FLAG_EXHAUSTIVE is set: select one random track
mas01cr@693 142 and offset, and one offset into datum, compute distance, add
mas01cr@693 143 to accumulator.
mas01cr@693 144
mas01cr@693 145 (b) ADB_QID_FLAG_EXHAUSTIVE is not set: select one random
mas01cr@693 146 track and offset, compute distance with datum, add to
mas01cr@693 147 accumulator.
mas01cr@693 148
mas01cr@693 149 That all sounds simple, right? Oh, but we also need to take care
mas01cr@693 150 of refinement: not just hop sizes, but power thresholding, track
mas01cr@693 151 inclusion, duration ratio (theoretically), radius thresholding
mas01cr@693 152 (no, that doesn't make sense).
mas01cr@693 153
mas01cr@693 154 Also, for ADB_ACCUMULATE_PER_TRACK, we need to do effectively
mas01cr@693 155 case 2 for each of the allowed keys.
mas01cr@693 156
mas01cr@693 157 */
mas01cr@693 158
mas01cr@693 159 if(spec->refine.flags & ADB_REFINE_RADIUS) {
mas01cr@693 160 /* doesn't make sense */
mas01cr@693 161 return 1;
mas01cr@280 162 }
mas01cr@280 163
mas01cr@693 164 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
mas01cr@693 165 /* does make sense in principle but my brain is too small today */
mas01cr@693 166 return -1;
mas01cr@693 167 }
mas01cr@280 168
mas01cr@693 169 if(spec->params.accumulation != ADB_ACCUMULATION_DB) {
mas01cr@693 170 /* likewise, brain too small */
mas01cr@693 171 return -1;
mas01cr@693 172 }
mas01cr@280 173
mas01cr@693 174 uint32_t qhop, ihop;
mas01cr@693 175 if(spec->refine.flags & ADB_REFINE_HOP_SIZE) {
mas01cr@693 176 qhop = spec->refine.qhopsize;
mas01cr@693 177 qhop = qhop ? qhop : 1;
mas01cr@693 178 ihop = spec->refine.ihopsize;
mas01cr@693 179 ihop = ihop ? ihop : 1;
mas01cr@693 180 } else {
mas01cr@693 181 qhop = 1;
mas01cr@693 182 ihop = 1;
mas01cr@693 183 }
mas01cr@693 184
mas01cr@693 185 uint32_t total = 0;
mas01cr@693 186 uint32_t *props = new uint32_t[adb->header->numFiles];
mas01cr@693 187
mas01cr@693 188 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@693 189 for(uint32_t i = 0; i < adb->header->numFiles; i++) {
mas01cr@693 190 if(qstate->allowed_keys->find((*adb->keys)[i]) == keys_end) {
mas01cr@765 191 props[i] = 0;
mas01cr@693 192 } else {
mas01cr@766 193 int32_t add = (*adb->track_lengths)[i] - spec->qid.sequence_length + 1;
mas01cr@765 194 props[i] = add > 0 ? add : 0;
mas01cr@765 195 total += props[i];
mas01cr@659 196 }
mas01cr@659 197 }
mas01cr@693 198
mas01cr@693 199 double *v1 = NULL;
mas01cr@693 200 double *v2 = new double[adb->header->dim * spec->qid.sequence_length];
mas01cr@693 201
mas01cr@693 202 if(!spec->qid.datum) {
mas01cr@693 203 v1 = new double[adb->header->dim * spec->qid.sequence_length];
mas01cr@693 204 }
mas01cr@693 205
mas01cr@693 206 uint32_t i1, i2, track1 = 0, track2;
mas01cr@693 207
mas01cr@693 208 uint32_t npoints = spec->params.npoints;
mas01cr@693 209 for(uint32_t nsamples = 0; nsamples < npoints;) {
mas01cr@693 210
mas01cr@693 211 if(spec->qid.datum) {
mas01cr@693 212 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@693 213 i1 = qhop * gsl_rng_uniform_int(adb->rng, (qpointers.nvectors - spec->qid.sequence_length + qhop - 1) / qhop);
mas01cr@693 214 v1 = query + i1 * adb->header->dim;
mas01cr@693 215 } else {
mas01cr@693 216 i1 = spec->qid.sequence_start;
mas01cr@693 217 v1 = query;
mas01cr@693 218 }
mas01cr@693 219 track2 = audiodb_random_track(adb, props, total);
mas01cr@693 220 i2 = ihop * gsl_rng_uniform_int(adb->rng, (props[track2] + ihop - 1) / ihop);
mas01cr@693 221 off_t offset2 = (*adb->track_offsets)[track2] + i2 * adb->header->dim * sizeof(double);
mas01cr@693 222 uint32_t length = adb->header->dim * spec->qid.sequence_length * sizeof(double);
mas01cr@693 223 lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset2);
mas01cr@693 224 read_or_goto_error(adb->fd, v2, length);
mas01cr@659 225 } else {
mas01cr@693 226 /* FIXME: Theoretically we should pass ihop here, but that's a
mas01cr@693 227 small correction that I'm ignoring for now */
mas01cr@693 228 track1 = audiodb_random_track(adb, props, total);
mas01cr@693 229 track2 = audiodb_random_track(adb, props, total);
mas01cr@693 230 if(track1 == track2) {
mas01cr@693 231 continue;
mas01cr@693 232 }
mas01cr@693 233
mas01cr@693 234 i1 = ihop * gsl_rng_uniform_int(adb->rng, (props[track1] + ihop - 1) / ihop);
mas01cr@693 235 i2 = ihop * gsl_rng_uniform_int(adb->rng, (props[track2] + ihop - 1) / ihop);
mas01cr@693 236
mas01cr@693 237 off_t offset1 = (*adb->track_offsets)[track1] + i1 * adb->header->dim * sizeof(double);
mas01cr@693 238 off_t offset2 = (*adb->track_offsets)[track2] + i2 * adb->header->dim * sizeof(double);
mas01cr@693 239 uint32_t length = adb->header->dim * spec->qid.sequence_length * sizeof(double);
mas01cr@693 240 lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset1);
mas01cr@693 241 read_or_goto_error(adb->fd, v1, length);
mas01cr@693 242 lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset2);
mas01cr@693 243 read_or_goto_error(adb->fd, v2, length);
mas01cr@659 244 }
mas01cr@280 245
mas01cr@693 246 /* FIXME: replace at least the norms with dbpointers */
mas01cr@693 247 double v1norm = 0;
mas01cr@693 248 double v2norm = 0;
mas01cr@693 249 double v1v2 = 0;
mas01cr@693 250 double dist = 0;
mas01cr@280 251
mas01cr@693 252 uint32_t vlen = spec->qid.sequence_length * adb->header->dim;
mas01cr@280 253 for (unsigned int j = 0; j < vlen; j++) {
mas01cr@280 254 v1norm += v1[j]*v1[j];
mas01cr@280 255 v2norm += v2[j]*v2[j];
mas01cr@280 256 v1v2 += v1[j]*v2[j];
mas01cr@280 257 }
mas01cr@280 258
mas01cr@693 259 /* FIXME: do power thresholding test */
mas01cr@280 260 if(isfinite(v1norm) && isfinite(v2norm) && isfinite(v1v2)) {
mas01cr@693 261 switch(spec->params.distance) {
mas01cr@693 262 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@693 263 dist = 2 - 2 * v1v2 / sqrt(v1norm * v2norm);
mas01cr@693 264 break;
mas01cr@693 265 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@693 266 dist = v1norm + v2norm - 2 * v1v2;
mas01cr@693 267 break;
mas01cr@693 268 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@693 269 dist = v1v2;
mas01cr@693 270 break;
mas01cr@693 271 }
mas01cr@693 272 } else {
mas01cr@693 273 continue;
mas01cr@693 274 }
mas01cr@280 275
mas01cr@693 276 adb_result_t r;
mas01cr@693 277 r.qkey = spec->qid.datum ? "" : (*adb->keys)[track1].c_str();
mas01cr@693 278 r.ikey = (*adb->keys)[track2].c_str();
mas01cr@693 279 r.dist = dist;
mas01cr@693 280 r.qpos = i1;
mas01cr@693 281 r.ipos = i2;
mas01cr@693 282 qstate->accumulator->add_point(&r);
mas01cr@693 283 nsamples++;
mas01cr@280 284 }
mas01cr@280 285
mas01cr@693 286 if(!spec->qid.datum) {
mas01cr@693 287 delete[] v1;
mas01cr@693 288 }
mas01cr@693 289 delete[] v2;
mas01cr@280 290
mas01cr@693 291 return 0;
mas01cr@280 292
mas01cr@693 293 error:
mas01cr@693 294 if(!spec->qid.datum) {
mas01cr@693 295 delete[] v1;
mas01cr@693 296 }
mas01cr@280 297 delete[] v2;
mas01cr@693 298 return 17;
mas01cr@280 299 }