mas01cr@693
|
1 extern "C" {
|
mas01cr@693
|
2 #include "audioDB_API.h"
|
mas01cr@693
|
3 }
|
mas01cr@693
|
4 #include "audioDB-internals.h"
|
mas01cr@693
|
5 #include "accumulators.h"
|
mas01cr@280
|
6
|
mas01cr@693
|
7 /* 0. if the datum in the spec is sufficiently NULL, do db x db
|
mas01cr@693
|
8 sampling;
|
mas01cr@280
|
9
|
mas01cr@693
|
10 1. if accumulation is DATABASE, do basically the current sampling
|
mas01cr@693
|
11 with track1 being the datum;
|
mas01cr@693
|
12
|
mas01cr@693
|
13 2. if accumulation is PER_TRACK, sample n points per db track
|
mas01cr@693
|
14 rather than n points overall.
|
mas01cr@693
|
15
|
mas01cr@693
|
16 3. if accumulation is ONE_TO_ONE, ponder hard.
|
mas01cr@693
|
17 */
|
mas01cr@693
|
18 adb_query_results_t *audiodb_sample_spec(adb_t *adb, const adb_query_spec_t *qspec) {
|
mas01cr@693
|
19 adb_qstate_internal_t qstate = {0};
|
mas01cr@693
|
20 /* FIXME: this paragraph is the same as in audiodb_query_spec() */
|
mas01cr@693
|
21 qstate.allowed_keys = new std::set<std::string>;
|
mas01cr@693
|
22 adb_query_results_t *results;
|
mas01cr@693
|
23 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
|
mas01cr@693
|
24 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
|
mas01cr@693
|
25 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
|
mas01cr@693
|
26 }
|
mas01cr@693
|
27 } else {
|
mas01cr@693
|
28 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
|
mas01cr@693
|
29 qstate.allowed_keys->insert((*adb->keys)[k]);
|
mas01cr@693
|
30 }
|
mas01cr@693
|
31 }
|
mas01cr@693
|
32 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
|
mas01cr@693
|
33 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
|
mas01cr@693
|
34 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
|
mas01cr@280
|
35 }
|
mas01cr@280
|
36 }
|
mas01cr@280
|
37
|
mas01cr@695
|
38 /* FIXME: this paragraph is the same as in audiodb_query_spec(). */
|
mas01cr@695
|
39 switch(qspec->params.distance) {
|
mas01cr@695
|
40 case ADB_DISTANCE_DOT_PRODUCT:
|
mas01cr@695
|
41 switch(qspec->params.accumulation) {
|
mas01cr@695
|
42 case ADB_ACCUMULATION_DB:
|
mas01cr@693
|
43 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
|
mas01cr@693
|
44 break;
|
mas01cr@695
|
45 case ADB_ACCUMULATION_PER_TRACK:
|
mas01cr@695
|
46 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
|
mas01cr@695
|
47 break;
|
mas01cr@695
|
48 case ADB_ACCUMULATION_ONE_TO_ONE:
|
mas01cr@695
|
49 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
|
mas01cr@693
|
50 break;
|
mas01cr@693
|
51 default:
|
mas01cr@693
|
52 goto error;
|
mas01cr@693
|
53 }
|
mas01cr@695
|
54 break;
|
mas01cr@695
|
55 case ADB_DISTANCE_EUCLIDEAN_NORMED:
|
mas01cr@695
|
56 case ADB_DISTANCE_EUCLIDEAN:
|
mas01cr@695
|
57 switch(qspec->params.accumulation) {
|
mas01cr@695
|
58 case ADB_ACCUMULATION_DB:
|
mas01cr@695
|
59 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
|
mas01cr@693
|
60 break;
|
mas01cr@695
|
61 case ADB_ACCUMULATION_PER_TRACK:
|
mas01cr@695
|
62 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
|
mas01cr@695
|
63 break;
|
mas01cr@695
|
64 case ADB_ACCUMULATION_ONE_TO_ONE:
|
mas01cr@695
|
65 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
|
mas01cr@695
|
66 break;
|
mas01cr@693
|
67 default:
|
mas01cr@693
|
68 goto error;
|
mas01cr@693
|
69 }
|
mas01cr@695
|
70 break;
|
mas01cr@695
|
71 default:
|
mas01cr@695
|
72 goto error;
|
mas01cr@693
|
73 }
|
mas01cr@693
|
74
|
mas01cr@693
|
75 if(audiodb_sample_loop(adb, qspec, &qstate)) {
|
mas01cr@693
|
76 goto error;
|
mas01cr@693
|
77 }
|
mas01cr@693
|
78
|
mas01cr@693
|
79 results = qstate.accumulator->get_points();
|
mas01cr@693
|
80
|
mas01cr@693
|
81 delete qstate.accumulator;
|
mas01cr@693
|
82 delete qstate.allowed_keys;
|
mas01cr@693
|
83
|
mas01cr@693
|
84 return results;
|
mas01cr@693
|
85
|
mas01cr@693
|
86 error:
|
mas01cr@693
|
87 if(qstate.accumulator)
|
mas01cr@693
|
88 delete qstate.accumulator;
|
mas01cr@693
|
89 if(qstate.allowed_keys)
|
mas01cr@693
|
90 delete qstate.allowed_keys;
|
mas01cr@693
|
91 return NULL;
|
mas01cr@280
|
92 }
|
mas01cr@280
|
93
|
mas01cr@693
|
94 uint32_t audiodb_random_track(adb_t *adb, uint32_t *propTable, unsigned total) {
|
mas01cr@280
|
95 /* FIXME: make this O(1) by using the alias-rejection method, or
|
mas01cr@280
|
96 some other sensible method of sampling from a discrete
|
mas01cr@280
|
97 distribution. */
|
mas01cr@693
|
98 double thing = gsl_rng_uniform(adb->rng);
|
mas01cr@280
|
99 unsigned sofar = 0;
|
mas01cr@693
|
100 for (unsigned int i = 0; i < adb->header->numFiles; i++) {
|
mas01cr@280
|
101 sofar += propTable[i];
|
mas01cr@280
|
102 if (thing < ((double) sofar / (double) total)) {
|
mas01cr@280
|
103 return i;
|
mas01cr@280
|
104 }
|
mas01cr@280
|
105 }
|
mas01cr@693
|
106 /* can't happen */
|
mas01cr@765
|
107 printf("sofar: %d; total: %d; thing: %f\n", sofar, total, thing);
|
mas01cr@693
|
108 return (uint32_t) -1;
|
mas01cr@280
|
109 }
|
mas01cr@280
|
110
|
mas01cr@693
|
111 int audiodb_sample_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
|
mas01cr@693
|
112 /* FIXME: all eerily reminiscent of audiodb_query_loop() */
|
mas01cr@693
|
113 double *query, *query_data;
|
mas01cr@693
|
114 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
|
mas01cr@693
|
115
|
mas01cr@693
|
116 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
|
mas01cr@693
|
117 /* FIXME: someday */
|
mas01cr@693
|
118 return 1;
|
mas01cr@280
|
119 }
|
mas01cr@280
|
120
|
mas01cr@693
|
121 if(spec->qid.datum) {
|
mas01cr@693
|
122 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
|
mas01cr@693
|
123 return 1;
|
mas01cr@693
|
124 }
|
mas01cr@280
|
125 }
|
mas01cr@280
|
126
|
mas01cr@693
|
127 /*
|
mas01cr@693
|
128 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
|
mas01cr@693
|
129 return 1;
|
mas01cr@693
|
130 }
|
mas01cr@693
|
131 */
|
mas01cr@693
|
132
|
mas01cr@693
|
133 /* three cases:
|
mas01cr@693
|
134
|
mas01cr@693
|
135 1. datum is null: Select two random tracks and two
|
mas01cr@693
|
136 offsets into those tracks, compute distance, and add to
|
mas01cr@693
|
137 accumulator.
|
mas01cr@693
|
138
|
mas01cr@693
|
139 2. datum is not null and
|
mas01cr@693
|
140
|
mas01cr@693
|
141 (a) ADB_QID_FLAG_EXHAUSTIVE is set: select one random track
|
mas01cr@693
|
142 and offset, and one offset into datum, compute distance, add
|
mas01cr@693
|
143 to accumulator.
|
mas01cr@693
|
144
|
mas01cr@693
|
145 (b) ADB_QID_FLAG_EXHAUSTIVE is not set: select one random
|
mas01cr@693
|
146 track and offset, compute distance with datum, add to
|
mas01cr@693
|
147 accumulator.
|
mas01cr@693
|
148
|
mas01cr@693
|
149 That all sounds simple, right? Oh, but we also need to take care
|
mas01cr@693
|
150 of refinement: not just hop sizes, but power thresholding, track
|
mas01cr@693
|
151 inclusion, duration ratio (theoretically), radius thresholding
|
mas01cr@693
|
152 (no, that doesn't make sense).
|
mas01cr@693
|
153
|
mas01cr@693
|
154 Also, for ADB_ACCUMULATE_PER_TRACK, we need to do effectively
|
mas01cr@693
|
155 case 2 for each of the allowed keys.
|
mas01cr@693
|
156
|
mas01cr@693
|
157 */
|
mas01cr@693
|
158
|
mas01cr@693
|
159 if(spec->refine.flags & ADB_REFINE_RADIUS) {
|
mas01cr@693
|
160 /* doesn't make sense */
|
mas01cr@693
|
161 return 1;
|
mas01cr@280
|
162 }
|
mas01cr@280
|
163
|
mas01cr@693
|
164 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
|
mas01cr@693
|
165 /* does make sense in principle but my brain is too small today */
|
mas01cr@693
|
166 return -1;
|
mas01cr@693
|
167 }
|
mas01cr@280
|
168
|
mas01cr@693
|
169 if(spec->params.accumulation != ADB_ACCUMULATION_DB) {
|
mas01cr@693
|
170 /* likewise, brain too small */
|
mas01cr@693
|
171 return -1;
|
mas01cr@693
|
172 }
|
mas01cr@280
|
173
|
mas01cr@693
|
174 uint32_t qhop, ihop;
|
mas01cr@693
|
175 if(spec->refine.flags & ADB_REFINE_HOP_SIZE) {
|
mas01cr@693
|
176 qhop = spec->refine.qhopsize;
|
mas01cr@693
|
177 qhop = qhop ? qhop : 1;
|
mas01cr@693
|
178 ihop = spec->refine.ihopsize;
|
mas01cr@693
|
179 ihop = ihop ? ihop : 1;
|
mas01cr@693
|
180 } else {
|
mas01cr@693
|
181 qhop = 1;
|
mas01cr@693
|
182 ihop = 1;
|
mas01cr@693
|
183 }
|
mas01cr@693
|
184
|
mas01cr@693
|
185 uint32_t total = 0;
|
mas01cr@693
|
186 uint32_t *props = new uint32_t[adb->header->numFiles];
|
mas01cr@693
|
187
|
mas01cr@693
|
188 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
|
mas01cr@693
|
189 for(uint32_t i = 0; i < adb->header->numFiles; i++) {
|
mas01cr@693
|
190 if(qstate->allowed_keys->find((*adb->keys)[i]) == keys_end) {
|
mas01cr@765
|
191 props[i] = 0;
|
mas01cr@693
|
192 } else {
|
mas01cr@766
|
193 int32_t add = (*adb->track_lengths)[i] - spec->qid.sequence_length + 1;
|
mas01cr@765
|
194 props[i] = add > 0 ? add : 0;
|
mas01cr@765
|
195 total += props[i];
|
mas01cr@659
|
196 }
|
mas01cr@659
|
197 }
|
mas01cr@693
|
198
|
mas01cr@693
|
199 double *v1 = NULL;
|
mas01cr@693
|
200 double *v2 = new double[adb->header->dim * spec->qid.sequence_length];
|
mas01cr@693
|
201
|
mas01cr@693
|
202 if(!spec->qid.datum) {
|
mas01cr@693
|
203 v1 = new double[adb->header->dim * spec->qid.sequence_length];
|
mas01cr@693
|
204 }
|
mas01cr@693
|
205
|
mas01cr@693
|
206 uint32_t i1, i2, track1 = 0, track2;
|
mas01cr@693
|
207
|
mas01cr@693
|
208 uint32_t npoints = spec->params.npoints;
|
mas01cr@693
|
209 for(uint32_t nsamples = 0; nsamples < npoints;) {
|
mas01cr@693
|
210
|
mas01cr@693
|
211 if(spec->qid.datum) {
|
mas01cr@693
|
212 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
|
mas01cr@693
|
213 i1 = qhop * gsl_rng_uniform_int(adb->rng, (qpointers.nvectors - spec->qid.sequence_length + qhop - 1) / qhop);
|
mas01cr@693
|
214 v1 = query + i1 * adb->header->dim;
|
mas01cr@693
|
215 } else {
|
mas01cr@693
|
216 i1 = spec->qid.sequence_start;
|
mas01cr@693
|
217 v1 = query;
|
mas01cr@693
|
218 }
|
mas01cr@693
|
219 track2 = audiodb_random_track(adb, props, total);
|
mas01cr@693
|
220 i2 = ihop * gsl_rng_uniform_int(adb->rng, (props[track2] + ihop - 1) / ihop);
|
mas01cr@693
|
221 off_t offset2 = (*adb->track_offsets)[track2] + i2 * adb->header->dim * sizeof(double);
|
mas01cr@693
|
222 uint32_t length = adb->header->dim * spec->qid.sequence_length * sizeof(double);
|
mas01cr@693
|
223 lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset2);
|
mas01cr@693
|
224 read_or_goto_error(adb->fd, v2, length);
|
mas01cr@659
|
225 } else {
|
mas01cr@693
|
226 /* FIXME: Theoretically we should pass ihop here, but that's a
|
mas01cr@693
|
227 small correction that I'm ignoring for now */
|
mas01cr@693
|
228 track1 = audiodb_random_track(adb, props, total);
|
mas01cr@693
|
229 track2 = audiodb_random_track(adb, props, total);
|
mas01cr@693
|
230 if(track1 == track2) {
|
mas01cr@693
|
231 continue;
|
mas01cr@693
|
232 }
|
mas01cr@693
|
233
|
mas01cr@693
|
234 i1 = ihop * gsl_rng_uniform_int(adb->rng, (props[track1] + ihop - 1) / ihop);
|
mas01cr@693
|
235 i2 = ihop * gsl_rng_uniform_int(adb->rng, (props[track2] + ihop - 1) / ihop);
|
mas01cr@693
|
236
|
mas01cr@693
|
237 off_t offset1 = (*adb->track_offsets)[track1] + i1 * adb->header->dim * sizeof(double);
|
mas01cr@693
|
238 off_t offset2 = (*adb->track_offsets)[track2] + i2 * adb->header->dim * sizeof(double);
|
mas01cr@693
|
239 uint32_t length = adb->header->dim * spec->qid.sequence_length * sizeof(double);
|
mas01cr@693
|
240 lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset1);
|
mas01cr@693
|
241 read_or_goto_error(adb->fd, v1, length);
|
mas01cr@693
|
242 lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset2);
|
mas01cr@693
|
243 read_or_goto_error(adb->fd, v2, length);
|
mas01cr@659
|
244 }
|
mas01cr@280
|
245
|
mas01cr@693
|
246 /* FIXME: replace at least the norms with dbpointers */
|
mas01cr@693
|
247 double v1norm = 0;
|
mas01cr@693
|
248 double v2norm = 0;
|
mas01cr@693
|
249 double v1v2 = 0;
|
mas01cr@693
|
250 double dist = 0;
|
mas01cr@280
|
251
|
mas01cr@693
|
252 uint32_t vlen = spec->qid.sequence_length * adb->header->dim;
|
mas01cr@280
|
253 for (unsigned int j = 0; j < vlen; j++) {
|
mas01cr@280
|
254 v1norm += v1[j]*v1[j];
|
mas01cr@280
|
255 v2norm += v2[j]*v2[j];
|
mas01cr@280
|
256 v1v2 += v1[j]*v2[j];
|
mas01cr@280
|
257 }
|
mas01cr@280
|
258
|
mas01cr@693
|
259 /* FIXME: do power thresholding test */
|
mas01cr@280
|
260 if(isfinite(v1norm) && isfinite(v2norm) && isfinite(v1v2)) {
|
mas01cr@693
|
261 switch(spec->params.distance) {
|
mas01cr@693
|
262 case ADB_DISTANCE_EUCLIDEAN_NORMED:
|
mas01cr@693
|
263 dist = 2 - 2 * v1v2 / sqrt(v1norm * v2norm);
|
mas01cr@693
|
264 break;
|
mas01cr@693
|
265 case ADB_DISTANCE_EUCLIDEAN:
|
mas01cr@693
|
266 dist = v1norm + v2norm - 2 * v1v2;
|
mas01cr@693
|
267 break;
|
mas01cr@693
|
268 case ADB_DISTANCE_DOT_PRODUCT:
|
mas01cr@693
|
269 dist = v1v2;
|
mas01cr@693
|
270 break;
|
mas01cr@693
|
271 }
|
mas01cr@693
|
272 } else {
|
mas01cr@693
|
273 continue;
|
mas01cr@693
|
274 }
|
mas01cr@280
|
275
|
mas01cr@693
|
276 adb_result_t r;
|
mas01cr@693
|
277 r.qkey = spec->qid.datum ? "" : (*adb->keys)[track1].c_str();
|
mas01cr@693
|
278 r.ikey = (*adb->keys)[track2].c_str();
|
mas01cr@693
|
279 r.dist = dist;
|
mas01cr@693
|
280 r.qpos = i1;
|
mas01cr@693
|
281 r.ipos = i2;
|
mas01cr@693
|
282 qstate->accumulator->add_point(&r);
|
mas01cr@693
|
283 nsamples++;
|
mas01cr@280
|
284 }
|
mas01cr@280
|
285
|
mas01cr@693
|
286 if(!spec->qid.datum) {
|
mas01cr@693
|
287 delete[] v1;
|
mas01cr@693
|
288 }
|
mas01cr@693
|
289 delete[] v2;
|
mas01cr@280
|
290
|
mas01cr@693
|
291 return 0;
|
mas01cr@280
|
292
|
mas01cr@693
|
293 error:
|
mas01cr@693
|
294 if(!spec->qid.datum) {
|
mas01cr@693
|
295 delete[] v1;
|
mas01cr@693
|
296 }
|
mas01cr@280
|
297 delete[] v2;
|
mas01cr@693
|
298 return 17;
|
mas01cr@280
|
299 }
|