Mercurial > hg > audiodb
comparison query.cpp @ 498:342822c2d49a
Merge api-inversion branch (-r656:771, but I don't expect to return to
that branch) into the trunk.
I expect there to be minor performance regressions (e.g. in the SOAP
server index cacheing, which I have forcibly removed) and minor
unplugged memory leaks (e.g. in audioDB::query(), where I don't free up
the datum). I hope that these leaks and performance regressions can be
plugged in short order. I also expect that some (but maybe not all) of
the issues currently addressed in the memory-leaks branch are superseded
or fixed by this merge.
There remains much work to be done; go forth and do it.
author | mas01cr |
---|---|
date | Sat, 10 Jan 2009 16:47:57 +0000 |
parents | fd890d2b38da |
children | cc2b97d020b1 |
comparison
equal
deleted
inserted
replaced
476:a7193678280b | 498:342822c2d49a |
---|---|
1 #include "audioDB.h" | 1 #include "audioDB.h" |
2 #include "reporter.h" | 2 #include "audioDB-internals.h" |
3 | 3 #include "accumulators.h" |
4 bool audioDB::powers_acceptable(double p1, double p2) { | 4 |
5 if (use_absolute_threshold) { | 5 bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) { |
6 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { | 6 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) { |
7 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) { | |
7 return false; | 8 return false; |
8 } | 9 } |
9 } | 10 } |
10 if (use_relative_threshold) { | 11 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) { |
11 if (fabs(p1-p2) > fabs(relative_threshold)) { | 12 if (fabs(p1-p2) > fabs(r->relative_threshold)) { |
12 return false; | 13 return false; |
13 } | 14 } |
14 } | 15 } |
15 return true; | 16 return true; |
16 } | 17 } |
17 | 18 |
18 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { | 19 adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) { |
19 // init database tables and dbH first | 20 adb_qstate_internal_t qstate = {0}; |
20 if(query_from_key) | 21 qstate.allowed_keys = new std::set<std::string>; |
21 initTables(dbName); | 22 adb_query_results_t *results; |
22 else | 23 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) { |
23 initTables(dbName, inFile); | 24 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) { |
24 | 25 qstate.allowed_keys->insert(qspec->refine.include.keys[k]); |
25 // keyKeyPos requires dbH to be initialized | 26 } |
26 if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND)) | 27 } else { |
27 error("Query key not found :",key); | 28 for(unsigned int k = 0; k < adb->header->numFiles; k++) { |
28 | 29 qstate.allowed_keys->insert((*adb->keys)[k]); |
29 switch (queryType) { | 30 } |
30 case O2_POINT_QUERY: | 31 } |
31 sequenceLength = 1; | 32 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) { |
32 normalizedDistance = false; | 33 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) { |
33 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN); | 34 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]); |
35 } | |
36 } | |
37 | |
38 switch(qspec->params.distance) { | |
39 case ADB_DISTANCE_DOT_PRODUCT: | |
40 switch(qspec->params.accumulation) { | |
41 case ADB_ACCUMULATION_DB: | |
42 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints); | |
43 break; | |
44 case ADB_ACCUMULATION_PER_TRACK: | |
45 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks); | |
46 break; | |
47 case ADB_ACCUMULATION_ONE_TO_ONE: | |
48 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>(); | |
49 break; | |
50 default: | |
51 goto error; | |
52 } | |
34 break; | 53 break; |
35 case O2_TRACK_QUERY: | 54 case ADB_DISTANCE_EUCLIDEAN_NORMED: |
36 sequenceLength = 1; | 55 case ADB_DISTANCE_EUCLIDEAN: |
37 normalizedDistance = false; | 56 switch(qspec->params.accumulation) { |
38 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles); | 57 case ADB_ACCUMULATION_DB: |
39 break; | 58 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints); |
40 case O2_SEQUENCE_QUERY: | 59 break; |
41 if(no_unit_norming) | 60 case ADB_ACCUMULATION_PER_TRACK: |
42 normalizedDistance = false; | 61 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks); |
43 if(radius == 0) { | 62 break; |
44 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles); | 63 case ADB_ACCUMULATION_ONE_TO_ONE: |
45 } else { | 64 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>(); |
46 if(index_exists(dbName, radius, sequenceLength)){ | 65 break; |
47 char* indexName = index_get_name(dbName, radius, sequenceLength); | 66 default: |
48 lsh = index_allocate(indexName, false); | 67 goto error; |
49 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); | |
50 delete[] indexName; | |
51 } | |
52 else | |
53 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); | |
54 } | |
55 break; | |
56 case O2_N_SEQUENCE_QUERY: | |
57 if(no_unit_norming) | |
58 normalizedDistance = false; | |
59 if(radius == 0) { | |
60 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles); | |
61 } else { | |
62 if(index_exists(dbName, radius, sequenceLength)){ | |
63 char* indexName = index_get_name(dbName, radius, sequenceLength); | |
64 lsh = index_allocate(indexName, false); | |
65 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); | |
66 delete[] indexName; | |
67 } | |
68 else | |
69 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles); | |
70 } | |
71 break; | |
72 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY : | |
73 if(no_unit_norming) | |
74 normalizedDistance = false; | |
75 if(radius == 0) { | |
76 error("query-type not yet supported"); | |
77 } | |
78 else { | |
79 if(index_exists(dbName, radius, sequenceLength)){ | |
80 char* indexName = index_get_name(dbName, radius, sequenceLength); | |
81 lsh = index_allocate(indexName, false); | |
82 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1); | |
83 delete[] indexName; | |
84 } | |
85 else | |
86 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles); | |
87 } | 68 } |
88 break; | 69 break; |
89 default: | 70 default: |
90 error("unrecognized queryType in query()"); | 71 goto error; |
91 } | 72 } |
92 | 73 |
93 // Test for index (again) here | 74 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) { |
94 if(radius && index_exists(dbName, radius, sequenceLength)){ | 75 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) { |
95 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength); | 76 goto error; |
96 index_query_loop(dbName, query_from_key_index); | 77 } |
97 } | 78 } else { |
98 else{ | 79 if(audiodb_query_loop(adb, qspec, &qstate)) { |
99 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName); | 80 goto error; |
100 query_loop(dbName, query_from_key_index); | 81 } |
101 } | 82 } |
102 | 83 |
103 reporter->report(fileTable, adbQueryResponse); | 84 results = qstate.accumulator->get_points(); |
104 } | 85 |
105 | 86 delete qstate.accumulator; |
106 // return ordinal position of key in keyTable | 87 delete qstate.allowed_keys; |
107 // this should really be a STL hash map search | 88 |
108 unsigned audioDB::getKeyPos(char* key){ | 89 return results; |
109 if(!dbH) | 90 |
110 error("dbH not initialized","getKeyPos"); | 91 error: |
111 for(unsigned k=0; k<dbH->numFiles; k++) | 92 if(qstate.accumulator) |
112 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0) | 93 delete qstate.accumulator; |
113 return k; | 94 if(qstate.allowed_keys) |
114 error("Key not found",key); | 95 delete qstate.allowed_keys; |
115 return O2_ERR_KEYNOTFOUND; | 96 return NULL; |
116 } | 97 } |
117 | 98 |
118 // This is a common pattern in sequence queries: what we are doing is | 99 int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) { |
119 // taking a window of length seqlen over a buffer of length length, | 100 free(rs->results); |
120 // and placing the sum of the elements in that window in the first | 101 free(rs); |
121 // element of the window: thus replacing all but the last seqlen | 102 return 0; |
122 // elements in the buffer with the corresponding windowed sum. | 103 } |
123 void audioDB::sequence_sum(double *buffer, int length, int seqlen) { | 104 |
124 double tmp1, tmp2, *ps; | 105 static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) { |
125 int j, w; | |
126 | |
127 tmp1 = *buffer; | |
128 j = 1; | |
129 w = seqlen - 1; | |
130 while(w--) { | |
131 *buffer += buffer[j++]; | |
132 } | |
133 ps = buffer + 1; | |
134 w = length - seqlen; // +1 - 1 | |
135 while(w--) { | |
136 tmp2 = *ps; | |
137 if(isfinite(tmp1)) { | |
138 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); | |
139 } else { | |
140 for(int i = 1; i < seqlen; i++) { | |
141 *ps += *(ps + i); | |
142 } | |
143 } | |
144 tmp1 = tmp2; | |
145 ps++; | |
146 } | |
147 } | |
148 | |
149 // In contrast to sequence_sum() above, sequence_sqrt() and | |
150 // sequence_average() below are simple mappers across the sequence. | |
151 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) { | |
152 int w = length - seqlen + 1; | |
153 while(w--) { | |
154 *buffer = sqrt(*buffer); | |
155 buffer++; | |
156 } | |
157 } | |
158 | |
159 void audioDB::sequence_average(double *buffer, int length, int seqlen) { | |
160 int w = length - seqlen + 1; | |
161 while(w--) { | |
162 *buffer /= seqlen; | |
163 buffer++; | |
164 } | |
165 } | |
166 | |
167 void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) { | |
168 unsigned int j, k, l, w; | 106 unsigned int j, k, l, w; |
169 double *dp, *qp, *sp; | 107 double *dp, *qp, *sp; |
170 | 108 |
171 const unsigned HOP_SIZE = sequenceHop; | 109 const unsigned HOP_SIZE = spec->refine.hopsize; |
172 const unsigned wL = sequenceLength; | 110 const unsigned wL = spec->qid.sequence_length; |
173 | 111 |
174 for(j = 0; j < numVectors; j++) { | 112 for(j = 0; j < numVectors; j++) { |
175 // Sum products matrix | 113 // Sum products matrix |
176 D[j] = new double[trackTable[track]]; | 114 D[j] = new double[(*adb->track_lengths)[track]]; |
177 assert(D[j]); | 115 assert(D[j]); |
178 // Matched filter matrix | 116 // Matched filter matrix |
179 DD[j]=new double[trackTable[track]]; | 117 DD[j]=new double[(*adb->track_lengths)[track]]; |
180 assert(DD[j]); | 118 assert(DD[j]); |
181 } | 119 } |
182 | 120 |
183 // Dot product | 121 // Dot product |
184 for(j = 0; j < numVectors; j++) | 122 for(j = 0; j < numVectors; j++) |
185 for(k = 0; k < trackTable[track]; k++){ | 123 for(k = 0; k < (*adb->track_lengths)[track]; k++){ |
186 qp = query + j * dbH->dim; | 124 qp = query + j * adb->header->dim; |
187 sp = data_buffer + k * dbH->dim; | 125 sp = data_buffer + k * adb->header->dim; |
188 DD[j][k] = 0.0; // Initialize matched filter array | 126 DD[j][k] = 0.0; // Initialize matched filter array |
189 dp = &D[j][k]; // point to correlation cell j,k | 127 dp = &D[j][k]; // point to correlation cell j,k |
190 *dp = 0.0; // initialize correlation cell | 128 *dp = 0.0; // initialize correlation cell |
191 l = dbH->dim; // size of vectors | 129 l = adb->header->dim; // size of vectors |
192 while(l--) | 130 while(l--) |
193 *dp += *qp++ * *sp++; | 131 *dp += *qp++ * *sp++; |
194 } | 132 } |
195 | 133 |
196 // Matched Filter | 134 // Matched Filter |
199 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop | 137 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop |
200 for(w = 0; w < wL; w++) { | 138 for(w = 0; w < wL; w++) { |
201 for(j = 0; j < numVectors - w; j++) { | 139 for(j = 0; j < numVectors - w; j++) { |
202 sp = DD[j]; | 140 sp = DD[j]; |
203 spd = D[j+w] + w; | 141 spd = D[j+w] + w; |
204 k = trackTable[track] - w; | 142 k = (*adb->track_lengths)[track] - w; |
205 while(k--) | 143 while(k--) |
206 *sp++ += *spd++; | 144 *sp++ += *spd++; |
207 } | 145 } |
208 } | 146 } |
209 } else { // HOP_SIZE != 1 | 147 } else { // HOP_SIZE != 1 |
210 for(w = 0; w < wL; w++) { | 148 for(w = 0; w < wL; w++) { |
211 for(j = 0; j < numVectors - w; j += HOP_SIZE) { | 149 for(j = 0; j < numVectors - w; j += HOP_SIZE) { |
212 sp = DD[j]; | 150 sp = DD[j]; |
213 spd = D[j+w]+w; | 151 spd = D[j+w]+w; |
214 for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) { | 152 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) { |
215 *sp += *spd; | 153 *sp += *spd; |
216 sp += HOP_SIZE; | 154 sp += HOP_SIZE; |
217 spd += HOP_SIZE; | 155 spd += HOP_SIZE; |
218 } | 156 } |
219 } | 157 } |
220 } | 158 } |
221 } | 159 } |
222 } | 160 } |
223 | 161 |
224 void audioDB::delete_arrays(int track, unsigned int numVectors, double **D, double **DD) { | 162 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) { |
225 if(D != NULL) { | 163 if(D != NULL) { |
226 for(unsigned int j = 0; j < numVectors; j++) { | 164 for(unsigned int j = 0; j < numVectors; j++) { |
227 delete[] D[j]; | 165 delete[] D[j]; |
228 } | 166 } |
229 } | 167 } |
232 delete[] DD[j]; | 170 delete[] DD[j]; |
233 } | 171 } |
234 } | 172 } |
235 } | 173 } |
236 | 174 |
237 void audioDB::read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) { | 175 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) { |
238 if (trackTable[track] * sizeof(double) * dbH->dim > *data_buffer_size_p) { | 176 uint32_t track_length = (*adb->track_lengths)[track]; |
177 size_t track_size = track_length * sizeof(double) * adb->header->dim; | |
178 if (track_size > *data_buffer_size_p) { | |
239 if(*data_buffer_p) { | 179 if(*data_buffer_p) { |
240 free(*data_buffer_p); | 180 free(*data_buffer_p); |
241 } | 181 } |
242 { | 182 { |
243 *data_buffer_size_p = trackTable[track] * sizeof(double) * dbH->dim; | 183 *data_buffer_size_p = track_size; |
244 void *tmp = malloc(*data_buffer_size_p); | 184 void *tmp = malloc(track_size); |
245 if (tmp == NULL) { | 185 if (tmp == NULL) { |
246 error("error allocating data buffer"); | 186 goto error; |
247 } | 187 } |
248 *data_buffer_p = (double *) tmp; | 188 *data_buffer_p = (double *) tmp; |
249 } | 189 } |
250 } | 190 } |
251 | 191 |
252 CHECKED_READ(trkfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim); | 192 read_or_goto_error(trkfid, *data_buffer_p, track_size); |
253 } | 193 return 0; |
254 | 194 |
255 // These names deserve some unpicking. The names starting with a "q" | 195 error: |
256 // are pointers to the query, norm and power vectors; the names | 196 return 1; |
257 // starting with "v" are things that will end up pointing to the | 197 } |
258 // actual query point's information. -- CSR, 2007-12-05 | 198 |
259 void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) { | 199 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) { |
260 *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); | 200 off_t track_offset = (*adb->track_offsets)[track_id]; |
261 | 201 if(adb->header->flags & O2_FLAG_LARGE_ADB) { |
262 if(!(dbH->flags & O2_FLAG_L2NORM)) { | 202 /* create a reference/insert, then use adb_insert_create_datum() */ |
263 error("Database must be L2 normed for sequence query","use -L2NORM"); | 203 adb_reference_t reference = {0}; |
264 } | 204 char features[MAXSTR], power[MAXSTR], times[MAXSTR]; |
265 | 205 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); |
266 if(*nvp < sequenceLength) { | 206 read_or_goto_error(adb->fd, features, MAXSTR); |
267 error("Query shorter than requested sequence length", "maybe use -l"); | 207 reference.features = features; |
268 } | 208 if(adb->header->flags & O2_FLAG_POWER) { |
209 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); | |
210 read_or_goto_error(adb->fd, power, MAXSTR); | |
211 reference.power = power; | |
212 } | |
213 if(adb->header->flags & O2_FLAG_TIMES) { | |
214 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); | |
215 read_or_goto_error(adb->fd, times, MAXSTR); | |
216 reference.times = times; | |
217 } | |
218 return audiodb_insert_create_datum(&reference, d); | |
219 } else { | |
220 /* initialize from sources of data that we already have */ | |
221 d->nvectors = (*adb->track_lengths)[track_id]; | |
222 d->dim = adb->header->dim; | |
223 d->key = (*adb->keys)[track_id].c_str(); | |
224 /* read out stuff from the database tables */ | |
225 d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double)); | |
226 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET); | |
227 read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double)); | |
228 if(adb->header->flags & O2_FLAG_POWER) { | |
229 d->power = (double *) malloc(d->nvectors * sizeof(double)); | |
230 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET); | |
231 read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double)); | |
232 } | |
233 if(adb->header->flags & O2_FLAG_TIMES) { | |
234 d->times = (double *) malloc(2 * d->nvectors * sizeof(double)); | |
235 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET); | |
236 read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double)); | |
237 } | |
238 return 0; | |
239 } | |
240 error: | |
241 audiodb_free_datum(d); | |
242 return 1; | |
243 } | |
244 | |
245 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { | |
246 uint32_t nvectors = d->nvectors; | |
247 | |
248 qpointers->nvectors = nvectors; | |
249 | |
250 size_t vector_size = nvectors * sizeof(double) * d->dim; | |
251 *vector_data = new double[vector_size]; | |
252 memcpy(*vector_data, d->data, vector_size); | |
253 | |
254 qpointers->l2norm_data = new double[vector_size / d->dim]; | |
255 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data); | |
256 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length); | |
257 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length); | |
258 | |
259 if(d->power) { | |
260 qpointers->power_data = new double[vector_size / d->dim]; | |
261 memcpy(qpointers->power_data, d->power, vector_size / d->dim); | |
262 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length); | |
263 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length); | |
264 } | |
265 | |
266 if(d->times) { | |
267 qpointers->mean_duration = new double[1]; | |
268 *qpointers->mean_duration = 0; | |
269 for(unsigned int k = 0; k < nvectors; k++) { | |
270 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k]; | |
271 } | |
272 *qpointers->mean_duration /= nvectors; | |
273 } | |
274 | |
275 *vector = *vector_data; | |
276 qpointers->l2norm = qpointers->l2norm_data; | |
277 qpointers->power = qpointers->power_data; | |
278 return 0; | |
279 } | |
280 | |
281 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) { | |
282 adb_datum_t *datum; | |
283 adb_datum_t d = {0}; | |
284 uint32_t sequence_length; | |
285 uint32_t sequence_start; | |
286 | |
287 datum = spec->qid.datum; | |
288 sequence_length = spec->qid.sequence_length; | |
289 sequence_start = spec->qid.sequence_start; | |
290 | |
291 if(datum->data) { | |
292 if(datum->dim != adb->header->dim) { | |
293 return 1; | |
294 } | |
295 /* initialize d, and mark that nothing needs freeing later. */ | |
296 d = *datum; | |
297 datum = &d; | |
298 } else if (datum->key) { | |
299 uint32_t track_id; | |
300 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) { | |
301 return 1; | |
302 } | |
303 audiodb_track_id_datum(adb, track_id, &d); | |
304 } else { | |
305 return 1; | |
306 } | |
307 | |
308 /* FIXME: check the overflow logic here */ | |
309 if(sequence_start + sequence_length > d.nvectors) { | |
310 if(datum != &d) { | |
311 audiodb_free_datum(&d); | |
312 } | |
313 return 1; | |
314 } | |
315 | |
316 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers); | |
317 | |
318 /* Finally, if applicable, set up the moving qpointers. */ | |
319 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) { | |
320 /* the qpointers are already at the start, and so correct. */ | |
321 } else { | |
322 /* adjust the qpointers to point to the correct place in the sequence */ | |
323 *vector = *vector_data + spec->qid.sequence_start * d.dim; | |
324 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start; | |
325 if(d.power) { | |
326 qpointers->power = qpointers->power_data + spec->qid.sequence_start; | |
327 } | |
328 qpointers->nvectors = sequence_length; | |
329 } | |
330 | |
331 /* Clean up: free any bits of datum that we have ourselves | |
332 * allocated. */ | |
333 if(datum != &d) { | |
334 audiodb_free_datum(&d); | |
335 } | |
336 | |
337 return 0; | |
338 } | |
339 | |
340 static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) { | |
341 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double)); | |
342 uint32_t sequence_length = spec->qid.sequence_length; | |
343 | |
344 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); | |
345 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO; | |
346 double *times_table = NULL; | |
347 | |
348 | |
349 dbpointers->nvectors = nvectors; | |
350 dbpointers->l2norm_data = new double[nvectors]; | |
351 | |
352 double *snpp = dbpointers->l2norm_data, *sppp = 0; | |
353 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET); | |
354 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double)); | |
355 | |
356 if (using_power) { | |
357 if (!(adb->header->flags & O2_FLAG_POWER)) { | |
358 goto error; | |
359 } | |
360 dbpointers->power_data = new double[nvectors]; | |
361 sppp = dbpointers->power_data; | |
362 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET); | |
363 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double)); | |
364 } | |
365 | |
366 for(unsigned int i = 0; i < adb->header->numFiles; i++){ | |
367 size_t track_length = (*adb->track_lengths)[i]; | |
368 if(track_length >= sequence_length) { | |
369 audiodb_sequence_sum(snpp, track_length, sequence_length); | |
370 audiodb_sequence_sqrt(snpp, track_length, sequence_length); | |
371 if (using_power) { | |
372 audiodb_sequence_sum(sppp, track_length, sequence_length); | |
373 audiodb_sequence_average(sppp, track_length, sequence_length); | |
374 } | |
375 } | |
376 snpp += track_length; | |
377 if (using_power) { | |
378 sppp += track_length; | |
379 } | |
380 } | |
381 | |
382 if (using_times) { | |
383 if(!(adb->header->flags & O2_FLAG_TIMES)) { | |
384 goto error; | |
385 } | |
386 | |
387 dbpointers->mean_duration = new double[adb->header->numFiles]; | |
388 | |
389 times_table = (double *) malloc(2 * nvectors * sizeof(double)); | |
390 if(!times_table) { | |
391 goto error; | |
392 } | |
393 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET); | |
394 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double)); | |
395 for(unsigned int k = 0; k < adb->header->numFiles; k++) { | |
396 size_t track_length = (*adb->track_lengths)[k]; | |
397 unsigned int j; | |
398 dbpointers->mean_duration[k] = 0.0; | |
399 for(j = 0; j < track_length; j++) { | |
400 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j]; | |
401 } | |
402 dbpointers->mean_duration[k] /= j; | |
403 } | |
404 | |
405 free(times_table); | |
406 times_table = NULL; | |
407 } | |
408 | |
409 dbpointers->l2norm = dbpointers->l2norm_data; | |
410 dbpointers->power = dbpointers->power_data; | |
411 return 0; | |
412 | |
413 error: | |
414 if(dbpointers->l2norm_data) { | |
415 delete [] dbpointers->l2norm_data; | |
416 } | |
417 if(dbpointers->power_data) { | |
418 delete [] dbpointers->power_data; | |
419 } | |
420 if(dbpointers->mean_duration) { | |
421 delete [] dbpointers->mean_duration; | |
422 } | |
423 if(times_table) { | |
424 free(times_table); | |
425 } | |
426 return 1; | |
269 | 427 |
270 VERB_LOG(1, "performing norms... "); | 428 } |
271 | 429 |
272 *qp = new double[*nvp * dbH->dim]; | 430 int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) { |
273 memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double)); | 431 adb_qpointers_internal_t dbpointers = {0}; |
274 *qnp = new double[*nvp]; | 432 |
275 unitNorm(*qp, dbH->dim, *nvp, *qnp); | 433 uint32_t sequence_length = spec->qid.sequence_length; |
276 | 434 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); |
277 sequence_sum(*qnp, *nvp, sequenceLength); | 435 |
278 sequence_sqrt(*qnp, *nvp, sequenceLength); | 436 if(qstate->exact_evaluation_queue->size() == 0) { |
279 | 437 return 0; |
280 if (usingPower) { | 438 } |
281 *qpp = new double[*nvp]; | 439 |
282 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { | 440 /* We are guaranteed that the order of points is sorted by: |
283 error("error seeking to data", powerFileName, "lseek"); | 441 * {trackID, spos, qpos} so we can be relatively efficient in |
284 } | 442 * initialization of track data. We assume that points usually |
285 int count = read(powerfd, *qpp, *nvp * sizeof(double)); | 443 * don't overlap, so we will use exhaustive dot product evaluation |
286 if (count == -1) { | 444 * (instead of memoization of partial sums, as in query_loop()). |
287 error("error reading data", powerFileName, "read"); | 445 */ |
288 } | |
289 if ((unsigned) count != *nvp * sizeof(double)) { | |
290 error("short read", powerFileName); | |
291 } | |
292 | |
293 sequence_sum(*qpp, *nvp, sequenceLength); | |
294 sequence_average(*qpp, *nvp, sequenceLength); | |
295 } | |
296 | |
297 if (usingTimes) { | |
298 unsigned int k; | |
299 *mqdp = 0.0; | |
300 double *querydurs = new double[*nvp]; | |
301 double *timesdata = new double[*nvp*2]; | |
302 insertTimeStamps(*nvp, timesFile, timesdata); | |
303 for(k = 0; k < *nvp; k++) { | |
304 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
305 *mqdp += querydurs[k]; | |
306 } | |
307 *mqdp /= k; | |
308 | |
309 VERB_LOG(1, "mean query file duration: %f\n", *mqdp); | |
310 | |
311 delete [] querydurs; | |
312 delete [] timesdata; | |
313 } | |
314 | |
315 // Defaults, for exhaustive search (!usingQueryPoint) | |
316 *vqp = *qp; | |
317 *vqnp = *qnp; | |
318 *vqpp = *qpp; | |
319 | |
320 if(usingQueryPoint) { | |
321 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) { | |
322 error("queryPoint >= numVectors-sequenceLength+1 in query"); | |
323 } else { | |
324 VERB_LOG(1, "query point: %u\n", queryPoint); | |
325 *vqp = *qp + queryPoint * dbH->dim; | |
326 *vqnp = *qnp + queryPoint; | |
327 if (usingPower) { | |
328 *vqpp = *qpp + queryPoint; | |
329 } | |
330 *nvp = sequenceLength; | |
331 } | |
332 } | |
333 } | |
334 | |
335 // Does the same as set_up_query(...) but from database features instead of from a file | |
336 // Constructs the same outputs as set_up_query | |
337 void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) { | |
338 if(!trackTable) | |
339 error("trackTable not initialized","set_up_query_from_key"); | |
340 | |
341 if(!(dbH->flags & O2_FLAG_L2NORM)) { | |
342 error("Database must be L2 normed for sequence query","use -L2NORM"); | |
343 } | |
344 | |
345 if(dbH->flags & O2_FLAG_POWER) | |
346 usingPower = true; | |
347 | |
348 if(dbH->flags & O2_FLAG_TIMES) | |
349 usingTimes = true; | |
350 | |
351 *nvp = trackTable[queryIndex]; | |
352 if(*nvp < sequenceLength) { | |
353 error("Query shorter than requested sequence length", "maybe use -l"); | |
354 } | |
355 | |
356 VERB_LOG(1, "performing norms... "); | |
357 | |
358 // For LARGE_ADB load query features from file | |
359 if( dbH->flags & O2_FLAG_LARGE_ADB ){ | |
360 if(infid>0) | |
361 close(infid); | |
362 char* prefixedString = new char[O2_MAXFILESTR]; | |
363 char* tmpStr = prefixedString; | |
364 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); | |
365 prefix_name(&prefixedString, adb_feature_root); | |
366 if(tmpStr!=prefixedString) | |
367 delete[] tmpStr; | |
368 initInputFile(prefixedString, false); // nommap, file pointer at correct position | |
369 size_t allocatedSize = 0; | |
370 read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize | |
371 // Consistency check on allocated memory and query feature size | |
372 if(*nvp*sizeof(double)*dbH->dim != allocatedSize) | |
373 error("Query memory allocation failed consitency check","set_up_query_from_key"); | |
374 // Allocated and calculate auxillary sequences: l2norm and power | |
375 init_track_aux_data(queryIndex, *qp, qnp, vqnp, qpp, vqpp); | |
376 } | |
377 else{ // Load from self-contained ADB database | |
378 // Read query feature vectors from database | |
379 *qp = NULL; | |
380 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET); | |
381 size_t allocatedSize = 0; | |
382 read_data(dbfid, queryIndex, qp, &allocatedSize); | |
383 // Consistency check on allocated memory and query feature size | |
384 if(*nvp*sizeof(double)*dbH->dim != allocatedSize) | |
385 error("Query memory allocation failed consitency check","set_up_query_from_key"); | |
386 | |
387 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors | |
388 // Copy L2 norm partial-sum coefficients | |
389 assert(*qnp = new double[*nvp]); | |
390 memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double)); | |
391 sequence_sum(*qnp, *nvp, sequenceLength); | |
392 sequence_sqrt(*qnp, *nvp, sequenceLength); | |
393 | |
394 if( usingPower ){ | |
395 // Copy Power partial-sum coefficients | |
396 assert(*qpp = new double[*nvp]); | |
397 memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double)); | |
398 sequence_sum(*qpp, *nvp, sequenceLength); | |
399 sequence_average(*qpp, *nvp, sequenceLength); | |
400 } | |
401 | |
402 if (usingTimes) { | |
403 unsigned int k; | |
404 *mqdp = 0.0; | |
405 double *querydurs = new double[*nvp]; | |
406 double *timesdata = new double[*nvp*2]; | |
407 assert(querydurs && timesdata); | |
408 memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double)); | |
409 for(k = 0; k < *nvp; k++) { | |
410 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
411 *mqdp += querydurs[k]; | |
412 } | |
413 *mqdp /= k; | |
414 | |
415 VERB_LOG(1, "mean query file duration: %f\n", *mqdp); | |
416 | |
417 delete [] querydurs; | |
418 delete [] timesdata; | |
419 } | |
420 } | |
421 | |
422 // Defaults, for exhaustive search (!usingQueryPoint) | |
423 *vqp = *qp; | |
424 *vqnp = *qnp; | |
425 *vqpp = *qpp; | |
426 | |
427 if(usingQueryPoint) { | |
428 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) { | |
429 error("queryPoint >= numVectors-sequenceLength+1 in query"); | |
430 } else { | |
431 VERB_LOG(1, "query point: %u\n", queryPoint); | |
432 *vqp = *qp + queryPoint * dbH->dim; | |
433 *vqnp = *qnp + queryPoint; | |
434 if (usingPower) { | |
435 *vqpp = *qpp + queryPoint; | |
436 } | |
437 *nvp = sequenceLength; | |
438 } | |
439 } | |
440 } | |
441 | |
442 | |
443 // FIXME: this is not the right name; we're not actually setting up | |
444 // the database, but copying various bits of it out of mmap()ed tables | |
445 // in order to reduce seeks. | |
446 void audioDB::set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) { | |
447 *dvp = dbH->length / (dbH->dim * sizeof(double)); | |
448 *snp = new double[*dvp]; | |
449 | |
450 double *snpp = *snp, *sppp = 0; | |
451 memcpy(*snp, l2normTable, *dvp * sizeof(double)); | |
452 | |
453 if (usingPower) { | |
454 if (!(dbH->flags & O2_FLAG_POWER)) { | |
455 error("database not power-enabled", dbName); | |
456 } | |
457 *spp = new double[*dvp]; | |
458 sppp = *spp; | |
459 memcpy(*spp, powerTable, *dvp * sizeof(double)); | |
460 } | |
461 | |
462 for(unsigned int i = 0; i < dbH->numFiles; i++){ | |
463 if(trackTable[i] >= sequenceLength) { | |
464 sequence_sum(snpp, trackTable[i], sequenceLength); | |
465 sequence_sqrt(snpp, trackTable[i], sequenceLength); | |
466 | |
467 if (usingPower) { | |
468 sequence_sum(sppp, trackTable[i], sequenceLength); | |
469 sequence_average(sppp, trackTable[i], sequenceLength); | |
470 } | |
471 } | |
472 snpp += trackTable[i]; | |
473 if (usingPower) { | |
474 sppp += trackTable[i]; | |
475 } | |
476 } | |
477 | |
478 if (usingTimes) { | |
479 if(!(dbH->flags & O2_FLAG_TIMES)) { | |
480 error("query timestamps provided for non-timed database", dbName); | |
481 } | |
482 | |
483 *mddp = new double[dbH->numFiles]; | |
484 | |
485 for(unsigned int k = 0; k < dbH->numFiles; k++) { | |
486 unsigned int j; | |
487 (*mddp)[k] = 0.0; | |
488 for(j = 0; j < trackTable[k]; j++) { | |
489 (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j]; | |
490 } | |
491 (*mddp)[k] /= j; | |
492 } | |
493 } | |
494 | |
495 *vsnp = *snp; | |
496 *vspp = *spp; | |
497 } | |
498 | |
499 // query_points() | |
500 // | |
501 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair | |
502 // and insert result into the current reporter. | |
503 // | |
504 // Preconditions: | |
505 // A query inFile has been opened with setup_query(...) and query pointers initialized | |
506 // The database contains some points | |
507 // An exact_evaluation_queue has been allocated and populated | |
508 // A reporter has been allocated | |
509 // | |
510 // Postconditions: | |
511 // reporter contains the points and distances that meet the reporter constraints | |
512 | |
513 void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){ | |
514 unsigned int dbVectors; | |
515 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0; | |
516 double *meanDBdur = 0; | |
517 | |
518 // check pre-conditions | |
519 assert(exact_evaluation_queue&&reporter); | |
520 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate | |
521 return; | |
522 | |
523 // Compute database info | |
524 // FIXME: we more than likely don't need very much of the database | |
525 // so make a new method to build these values per-track or, even better, per-point | |
526 if( !( dbH->flags & O2_FLAG_LARGE_ADB) ) | |
527 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors); | |
528 | |
529 VERB_LOG(1, "matching points..."); | |
530 | |
531 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
532 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
533 | |
534 // We are guaranteed that the order of points is sorted by: | |
535 // trackID, spos, qpos | |
536 // so we can be relatively efficient in initialization of track data. | |
537 // Here we assume that points don't overlap, so we will use exhaustive dot | |
538 // product evaluation instead of memoization of partial sums which is used | |
539 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop() | |
540 double dist; | 446 double dist; |
541 size_t data_buffer_size = 0; | 447 double *dbdata = 0, *dbdata_pointer; |
542 double *data_buffer = 0; | 448 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range |
543 Uns32T trackOffset = 0; | 449 Uns32T npairs = qstate->exact_evaluation_queue->size(); |
544 Uns32T trackIndexOffset = 0; | 450 while(npairs--) { |
545 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range | 451 PointPair pp = qstate->exact_evaluation_queue->top(); |
546 Uns32T npairs = exact_evaluation_queue->size(); | 452 if(currentTrack != pp.trackID) { |
547 while(npairs--){ | 453 SAFE_DELETE_ARRAY(dbdata); |
548 PointPair pp = exact_evaluation_queue->top(); | 454 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); |
549 // Large ADB track data must be loaded here for sPower | 455 SAFE_DELETE_ARRAY(dbpointers.power_data); |
550 if(dbH->flags & O2_FLAG_LARGE_ADB){ | 456 SAFE_DELETE_ARRAY(dbpointers.mean_duration); |
551 trackOffset=0; | 457 currentTrack = pp.trackID; |
552 trackIndexOffset=0; | 458 adb_datum_t d = {0}; |
553 if(currentTrack!=pp.trackID){ | 459 if(audiodb_track_id_datum(adb, pp.trackID, &d)) { |
554 char* prefixedString = new char[O2_MAXFILESTR]; | 460 delete qstate->exact_evaluation_queue; |
555 char* tmpStr = prefixedString; | 461 return 1; |
556 // On currentTrack change, allocate and load track data | 462 } |
557 currentTrack=pp.trackID; | 463 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) { |
558 SAFE_DELETE_ARRAY(sNorm); | 464 delete qstate->exact_evaluation_queue; |
559 SAFE_DELETE_ARRAY(sPower); | 465 audiodb_free_datum(&d); |
560 if(infid>0) | 466 return 1; |
561 close(infid); | 467 } |
562 // Open and check dimensions of feature file | 468 audiodb_free_datum(&d); |
563 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR); | 469 } |
564 prefix_name((char ** const) &prefixedString, adb_feature_root); | 470 Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0; |
565 if (prefixedString!=tmpStr) | 471 Uns32T sPos = pp.spos; // index into l2norm table |
566 delete[] tmpStr; | |
567 initInputFile(prefixedString, false); // nommap, file pointer at correct position | |
568 // Load the feature vector data for current track into data_buffer | |
569 read_data(infid, pp.trackID, &data_buffer, &data_buffer_size); | |
570 // Load power and calculate power and l2norm sequence sums | |
571 init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr); | |
572 } | |
573 } | |
574 else{ | |
575 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables | |
576 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset | |
577 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset | |
578 } | |
579 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point | |
580 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table | |
581 // Test power thresholds before computing distance | 472 // Test power thresholds before computing distance |
582 if( ( !usingPower || powers_acceptable(qpPtr[qPos], sPower[sPos])) && | 473 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) && |
583 ( qPos<numVectors-sequenceLength+1 && pp.spos<trackTable[pp.trackID]-sequenceLength+1 ) ){ | 474 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){ |
584 // Non-large ADB track data is loaded inside power test for efficiency | |
585 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){ | |
586 // On currentTrack change, allocate and load track data | |
587 currentTrack=pp.trackID; | |
588 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
589 read_data(dbfid, currentTrack, &data_buffer, &data_buffer_size); | |
590 } | |
591 // Compute distance | 475 // Compute distance |
592 dist = dot_product_points(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength); | 476 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length); |
593 double qn = qnPtr[qPos]; | 477 double qn = qpointers->l2norm[qPos]; |
594 double sn = sNorm[sPos]; | 478 double sn = dbpointers.l2norm[sPos]; |
595 if(normalizedDistance) | 479 switch(spec->params.distance) { |
480 case ADB_DISTANCE_EUCLIDEAN_NORMED: | |
596 dist = 2 - (2/(qn*sn))*dist; | 481 dist = 2 - (2/(qn*sn))*dist; |
597 else | 482 break; |
598 if(no_unit_norming) | 483 case ADB_DISTANCE_EUCLIDEAN: |
599 dist = qn*qn + sn*sn - 2*dist; | 484 dist = qn*qn + sn*sn - 2*dist; |
600 // else | 485 break; |
601 // dist = dist; | 486 } |
602 if((!radius) || dist <= (radius+O2_DISTANCE_TOLERANCE)) | 487 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || |
603 reporter->add_point(pp.trackID, pp.qpos, pp.spos, dist); | 488 dist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { |
604 } | 489 adb_result_t r; |
605 exact_evaluation_queue->pop(); | 490 r.key = (*adb->keys)[pp.trackID].c_str(); |
491 r.dist = dist; | |
492 r.qpos = pp.qpos; | |
493 r.ipos = pp.spos; | |
494 qstate->accumulator->add_point(&r); | |
495 } | |
496 } | |
497 qstate->exact_evaluation_queue->pop(); | |
606 } | 498 } |
607 | 499 |
608 | 500 |
609 // Cleanup | 501 // Cleanup |
610 free(data_buffer); | 502 SAFE_DELETE_ARRAY(dbdata); |
611 SAFE_DELETE_ARRAY(sNorm); | 503 SAFE_DELETE_ARRAY(dbpointers.l2norm_data); |
612 SAFE_DELETE_ARRAY(sPower); | 504 SAFE_DELETE_ARRAY(dbpointers.power_data); |
613 SAFE_DELETE_ARRAY(meanDBdur); | 505 SAFE_DELETE_ARRAY(dbpointers.mean_duration); |
614 } | 506 delete qstate->exact_evaluation_queue; |
615 | 507 return 0; |
616 // A completely unprotected dot-product method | 508 } |
617 // Caller is responsible for ensuring that memory is within bounds | 509 |
618 inline double audioDB::dot_product_points(double* q, double* p, Uns32T L){ | 510 int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) { |
619 double dist = 0.0; | |
620 while(L--) | |
621 dist += *q++ * *p++; | |
622 return dist; | |
623 } | |
624 | |
625 void audioDB::query_loop(const char* dbName, Uns32T queryIndex) { | |
626 | 511 |
627 unsigned int numVectors; | |
628 double *query, *query_data; | 512 double *query, *query_data; |
629 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0; | 513 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0}; |
630 double meanQdur; | 514 |
631 | 515 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD); |
632 if( dbH->flags & O2_FLAG_LARGE_ADB ) | 516 |
633 error("error: LARGE_ADB requires indexed query"); | 517 if(adb->header->flags & O2_FLAG_LARGE_ADB) { |
634 | 518 /* FIXME: actually it would be nice to support this mode of |
635 if(query_from_key) | 519 * operation, but for now... */ |
636 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex); | 520 return 1; |
637 else | 521 } |
638 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); | 522 |
639 | 523 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) { |
640 unsigned int dbVectors; | 524 return 1; |
641 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; | 525 } |
642 double *meanDBdur = 0; | 526 |
643 | 527 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) { |
644 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors); | 528 return 1; |
645 | 529 } |
646 VERB_LOG(1, "matching tracks..."); | 530 |
647 | 531 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize; |
648 assert(pointNN>0 && pointNN<=O2_MAXNN); | 532 unsigned wL = spec->qid.sequence_length; |
649 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
650 | |
651 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | |
652 double **D = 0; // Differences query and target | 533 double **D = 0; // Differences query and target |
653 double **DD = 0; // Matched filter distance | 534 double **DD = 0; // Matched filter distance |
654 | 535 |
655 D = new double*[numVectors]; // pre-allocate | 536 D = new double*[qpointers.nvectors]; // pre-allocate |
656 DD = new double*[numVectors]; | 537 DD = new double*[qpointers.nvectors]; |
657 | 538 |
658 gettimeofday(&tv1, NULL); | |
659 unsigned processedTracks = 0; | |
660 off_t trackIndexOffset; | 539 off_t trackIndexOffset; |
661 char nextKey[MAXSTR]; | |
662 | 540 |
663 // Track loop | 541 // Track loop |
664 size_t data_buffer_size = 0; | 542 size_t data_buffer_size = 0; |
665 double *data_buffer = 0; | 543 double *data_buffer = 0; |
666 lseek(dbfid, dbH->dataOffset, SEEK_SET); | 544 lseek(adb->fd, adb->header->dataOffset, SEEK_SET); |
667 | 545 |
668 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { | 546 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end(); |
669 | 547 for(track = 0; track < adb->header->numFiles; track++) { |
670 trackOffset = trackOffsetTable[track]; // numDoubles offset | 548 unsigned t = track; |
671 | 549 |
672 // get trackID from file if using a control file | 550 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) { |
673 if(trackFile) { | 551 track++; |
674 trackFile->getline(nextKey,MAXSTR); | 552 if(track == adb->header->numFiles) { |
675 if(!trackFile->eof()) { | 553 goto loop_finish; |
676 track = getKeyPos(nextKey); | 554 } |
677 trackOffset = trackOffsetTable[track]; | 555 } |
678 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | 556 trackOffset = (*adb->track_offsets)[track]; |
679 } else { | 557 if(track != t) { |
680 break; | 558 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET); |
681 } | 559 } |
682 } | 560 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset |
683 | 561 |
684 // skip identity on query_from_key | 562 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) { |
685 if( query_from_key && (track == queryIndex) ) { | 563 return 1; |
686 if(queryIndex!=dbH->numFiles-1){ | 564 } |
687 track++; | 565 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences |
688 trackOffset = trackOffsetTable[track]; | |
689 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
690 } | |
691 else{ | |
692 break; | |
693 } | |
694 } | |
695 | |
696 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | |
697 | |
698 read_data(dbfid, track, &data_buffer, &data_buffer_size); | |
699 if(sequenceLength <= trackTable[track]) { // test for short sequences | |
700 | 566 |
701 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]); | 567 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD); |
702 | 568 |
703 initialize_arrays(track, numVectors, query, data_buffer, D, DD); | 569 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) || |
704 | 570 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) { |
705 if(usingTimes) { | |
706 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]); | |
707 } | |
708 | |
709 if((!usingTimes) || fabs(meanDBdur[track]-meanQdur) < meanQdur*timesTol) { | |
710 if(usingTimes) { | |
711 VERB_LOG(3,"within duration tolerance.\n"); | |
712 } | |
713 | 571 |
714 // Search for minimum distance by shingles (concatenated vectors) | 572 // Search for minimum distance by shingles (concatenated vectors) |
715 for(j = 0; j <= numVectors - wL; j += HOP_SIZE) { | 573 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) { |
716 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { | 574 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) { |
717 double thisDist; | 575 double thisDist = 0; |
718 if(normalizedDistance) | 576 double qn = qpointers.l2norm[j]; |
719 thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | 577 double sn = dbpointers.l2norm[trackIndexOffset + k]; |
720 else | 578 switch(spec->params.distance) { |
721 if(no_unit_norming) | 579 case ADB_DISTANCE_EUCLIDEAN_NORMED: |
722 thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; | 580 thisDist = 2-(2/(qn*sn))*DD[j][k]; |
723 else | 581 break; |
724 thisDist = DD[j][k]; | 582 case ADB_DISTANCE_EUCLIDEAN: |
725 | 583 thisDist = qn*qn + sn*sn - 2*DD[j][k]; |
584 break; | |
585 case ADB_DISTANCE_DOT_PRODUCT: | |
586 thisDist = DD[j][k]; | |
587 break; | |
588 } | |
726 // Power test | 589 // Power test |
727 if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) { | 590 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) { |
728 // radius test | 591 // radius test |
729 if((!radius) || thisDist <= (radius+O2_DISTANCE_TOLERANCE)) { | 592 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) || |
730 reporter->add_point(track, usingQueryPoint ? queryPoint : j, k, thisDist); | 593 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) { |
594 adb_result_t r; | |
595 r.key = (*adb->keys)[track].c_str(); | |
596 r.dist = thisDist; | |
597 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) { | |
598 r.qpos = j; | |
599 } else { | |
600 r.qpos = spec->qid.sequence_start; | |
601 } | |
602 r.ipos = k; | |
603 qstate->accumulator->add_point(&r); | |
731 } | 604 } |
732 } | 605 } |
733 } | 606 } |
734 } | 607 } |
735 } // Duration match | 608 } // Duration match |
736 delete_arrays(track, numVectors, D, DD); | 609 audiodb_delete_arrays(track, qpointers.nvectors, D, DD); |
737 } | 610 } |
738 } | 611 } |
612 | |
613 loop_finish: | |
739 | 614 |
740 free(data_buffer); | 615 free(data_buffer); |
741 | |
742 gettimeofday(&tv2,NULL); | |
743 VERB_LOG(1,"elapsed time: %ld msec\n", | |
744 (tv2.tv_sec*1000 + tv2.tv_usec/1000) - | |
745 (tv1.tv_sec*1000 + tv1.tv_usec/1000)) | |
746 | 616 |
747 // Clean up | 617 // Clean up |
748 if(query_data) | 618 if(query_data) |
749 delete[] query_data; | 619 delete[] query_data; |
750 if(qNorm) | 620 if(qpointers.l2norm_data) |
751 delete[] qNorm; | 621 delete[] qpointers.l2norm_data; |
752 if(sNorm) | 622 if(qpointers.power_data) |
753 delete[] sNorm; | 623 delete[] qpointers.power_data; |
754 if(qPower) | 624 if(qpointers.mean_duration) |
755 delete[] qPower; | 625 delete[] qpointers.mean_duration; |
756 if(sPower) | 626 if(dbpointers.power_data) |
757 delete[] sPower; | 627 delete[] dbpointers.power_data; |
628 if(dbpointers.l2norm_data) | |
629 delete[] dbpointers.l2norm_data; | |
758 if(D) | 630 if(D) |
759 delete[] D; | 631 delete[] D; |
760 if(DD) | 632 if(DD) |
761 delete[] DD; | 633 delete[] DD; |
762 if(meanDBdur) | 634 if(dbpointers.mean_duration) |
763 delete[] meanDBdur; | 635 delete[] dbpointers.mean_duration; |
764 } | 636 |
765 | 637 return 0; |
766 // Unit norm block of features | 638 } |
767 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ | |
768 unsigned d; | |
769 double L2, *p; | |
770 | |
771 VERB_LOG(2, "norming %u vectors...", n); | |
772 while(n--) { | |
773 p = X; | |
774 L2 = 0.0; | |
775 d = dim; | |
776 while(d--) { | |
777 L2 += *p * *p; | |
778 p++; | |
779 } | |
780 if(qNorm) { | |
781 *qNorm++=L2; | |
782 } | |
783 X += dim; | |
784 } | |
785 VERB_LOG(2, "done.\n"); | |
786 } | |
787 | |
788 |