mas01cr@457
|
1 #include "accumulator.h"
|
mas01cr@457
|
2
|
mas01cr@457
|
3 /* this struct is for writing polymorphic routines as puns. When
|
mas01cr@457
|
4 * inserting, we might have a "datum" (with actual numerical data) or
|
mas01cr@457
|
5 * a "reference" (with strings denoting pathnames containing numerical
|
mas01cr@457
|
6 * data), but most of the operations are the same. This struct, used
|
mas01cr@457
|
7 * only internally, allows us to write the main body of the insert
|
mas01cr@457
|
8 * code only once.
|
mas01cr@457
|
9 */
|
mas01cr@408
|
10 typedef struct adb_datum_internal {
|
mas01cr@408
|
11 uint32_t nvectors;
|
mas01cr@408
|
12 uint32_t dim;
|
mas01cr@408
|
13 const char *key;
|
mas01cr@408
|
14 void *data;
|
mas01cr@408
|
15 void *times;
|
mas01cr@408
|
16 void *power;
|
mas01cr@408
|
17 } adb_datum_internal_t;
|
mas01cr@408
|
18
|
mas01cr@463
|
19 /* this struct is to collect together a bunch of information about a
|
mas01cr@463
|
20 * query (or, in fact, a single database entry, or even a whole
|
mas01cr@463
|
21 * database). The _data pointers are immutable (hey, FIXME: should
|
mas01cr@463
|
22 * they be constified in some way?) so that free() can work on them
|
mas01cr@463
|
23 * later, while the ones without the suffix are mutable to maintain
|
mas01cr@463
|
24 * the "current" position in some way. mean_duration points to a
|
mas01cr@463
|
25 * (possibly single-element) array of mean durations for each track.
|
mas01cr@463
|
26 */
|
mas01cr@463
|
27 typedef struct adb_qpointers_internal {
|
mas01cr@463
|
28 uint32_t nvectors;
|
mas01cr@463
|
29 double *l2norm_data;
|
mas01cr@463
|
30 double *l2norm;
|
mas01cr@463
|
31 double *power_data;
|
mas01cr@463
|
32 double *power;
|
mas01cr@463
|
33 double *mean_duration;
|
mas01cr@463
|
34 } adb_qpointers_internal_t;
|
mas01cr@463
|
35
|
mas01cr@457
|
36 /* this struct is for maintaining per-query state. We don't want to
|
mas01cr@457
|
37 * store this stuff in the adb struct itself, because (a) it doesn't
|
mas01cr@457
|
38 * belong there and (b) in principle people might do two queries in
|
mas01cr@457
|
39 * parallel using the same adb handle. (b) is in practice a little
|
mas01cr@457
|
40 * bit academic because at the moment we're seeking all over the disk
|
mas01cr@457
|
41 * using adb->fd, but changing to use pread() might win us
|
mas01cr@457
|
42 * threadsafety eventually.
|
mas01cr@457
|
43 */
|
mas01cr@468
|
44 typedef struct adb_qstate_internal {
|
mas01cr@468
|
45 Accumulator *accumulator;
|
mas01cr@468
|
46 std::set<std::string> *allowed_keys;
|
mas01cr@468
|
47 std::priority_queue<PointPair> *exact_evaluation_queue;
|
mas01cr@468
|
48 LSH *lsh;
|
mas01cr@468
|
49 } adb_qstate_internal_t;
|
mas01cr@457
|
50
|
mas01cr@468
|
51 /* the transparent version of the opaque (forward-declared) adb_t. */
|
mas01cr@402
|
52 struct adb {
|
mas01cr@402
|
53 char *path;
|
mas01cr@402
|
54 int fd;
|
mas01cr@402
|
55 int flags;
|
mas01cr@402
|
56 adb_header_t *header;
|
mas01cr@453
|
57 std::vector<std::string> *keys;
|
mas01cr@453
|
58 std::map<std::string,uint32_t> *keymap;
|
mas01cr@432
|
59 std::vector<uint32_t> *track_lengths;
|
mas01cr@442
|
60 std::vector<off_t> *track_offsets;
|
mas01cr@465
|
61 LSH *cached_lsh;
|
mas01cr@402
|
62 };
|
mas01cr@402
|
63
|
mas01cr@416
|
64 typedef struct {
|
mas01cr@416
|
65 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
|
mas01cr@416
|
66 return strcmp(r1.key, r2.key) < 0;
|
mas01cr@416
|
67 }
|
mas01cr@416
|
68 } adb_result_key_lt;
|
mas01cr@416
|
69
|
mas01cr@416
|
70 typedef struct {
|
mas01cr@416
|
71 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
|
mas01cr@416
|
72 return r1.qpos < r2.qpos;
|
mas01cr@416
|
73 }
|
mas01cr@416
|
74 } adb_result_qpos_lt;
|
mas01cr@416
|
75
|
mas01cr@416
|
76 typedef struct {
|
mas01cr@416
|
77 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
|
mas01cr@416
|
78 return r1.dist < r2.dist;
|
mas01cr@416
|
79 }
|
mas01cr@416
|
80 } adb_result_dist_lt;
|
mas01cr@416
|
81
|
mas01cr@416
|
82 typedef struct {
|
mas01cr@416
|
83 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
|
mas01cr@416
|
84 return r1.dist > r2.dist;
|
mas01cr@416
|
85 }
|
mas01cr@416
|
86 } adb_result_dist_gt;
|
mas01cr@416
|
87
|
mas01cr@416
|
88 typedef struct {
|
mas01cr@416
|
89 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
|
mas01cr@416
|
90 return ((r1.ipos < r2.ipos) ||
|
mas01cr@416
|
91 ((r1.ipos == r2.ipos) &&
|
mas01cr@416
|
92 ((r1.qpos < r2.qpos) ||
|
mas01cr@416
|
93 ((r1.qpos == r2.qpos) && (strcmp(r1.key, r2.key) < 0)))));
|
mas01cr@416
|
94 }
|
mas01cr@416
|
95 } adb_result_triple_lt;
|
mas01cr@416
|
96
|
mas01cr@401
|
97 /* We could go gcc-specific here and use typeof() instead of passing
|
mas01cr@401
|
98 * in an explicit type. Answers on a postcard as to whether that's a
|
mas01cr@401
|
99 * good plan or not. */
|
mas01cr@401
|
100 #define mmap_or_goto_error(type, var, start, length) \
|
mas01cr@401
|
101 { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \
|
mas01cr@401
|
102 if(tmp == (void *) -1) { \
|
mas01cr@401
|
103 goto error; \
|
mas01cr@401
|
104 } \
|
mas01cr@401
|
105 var = (type) tmp; \
|
mas01cr@401
|
106 }
|
mas01cr@401
|
107
|
mas01cr@401
|
108 #define maybe_munmap(table, length) \
|
mas01cr@401
|
109 { if(table) { \
|
mas01cr@401
|
110 munmap(table, length); \
|
mas01cr@401
|
111 } \
|
mas01cr@401
|
112 }
|
mas01cr@401
|
113
|
mas01cr@410
|
114 #define write_or_goto_error(fd, buffer, size) \
|
mas01cr@410
|
115 { ssize_t tmp = size; \
|
mas01cr@410
|
116 if(write(fd, buffer, size) != tmp) { \
|
mas01cr@410
|
117 goto error; \
|
mas01cr@410
|
118 } \
|
mas01cr@410
|
119 }
|
mas01cr@410
|
120
|
mas01cr@410
|
121 #define read_or_goto_error(fd, buffer, size) \
|
mas01cr@410
|
122 { ssize_t tmp = size; \
|
mas01cr@410
|
123 if(read(fd, buffer, size) != tmp) { \
|
mas01cr@410
|
124 goto error; \
|
mas01cr@410
|
125 } \
|
mas01cr@410
|
126 }
|
mas01cr@410
|
127
|
mas01cr@401
|
128 static inline int audiodb_sync_header(adb_t *adb) {
|
mas01cr@401
|
129 off_t pos;
|
mas01cr@401
|
130 pos = lseek(adb->fd, (off_t) 0, SEEK_CUR);
|
mas01cr@401
|
131 if(pos == (off_t) -1) {
|
mas01cr@401
|
132 goto error;
|
mas01cr@401
|
133 }
|
mas01cr@401
|
134 if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) {
|
mas01cr@401
|
135 goto error;
|
mas01cr@401
|
136 }
|
mas01cr@401
|
137 if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) {
|
mas01cr@401
|
138 goto error;
|
mas01cr@401
|
139 }
|
mas01cr@401
|
140
|
mas01cr@401
|
141 /* can be fsync() if fdatasync() is racily exciting and new */
|
mas01cr@401
|
142 fdatasync(adb->fd);
|
mas01cr@401
|
143 if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) {
|
mas01cr@401
|
144 goto error;
|
mas01cr@401
|
145 }
|
mas01cr@401
|
146 return 0;
|
mas01cr@401
|
147
|
mas01cr@401
|
148 error:
|
mas01cr@401
|
149 return 1;
|
mas01cr@401
|
150 }
|
mas01cr@425
|
151
|
mas01cr@425
|
152 static inline double audiodb_dot_product(double *p, double *q, size_t count) {
|
mas01cr@425
|
153 double result = 0;
|
mas01cr@425
|
154 while(count--) {
|
mas01cr@425
|
155 result += *p++ * *q++;
|
mas01cr@425
|
156 }
|
mas01cr@425
|
157 return result;
|
mas01cr@425
|
158 }
|
mas01cr@426
|
159
|
mas01cr@426
|
160 static inline void audiodb_l2norm_buffer(double *d, size_t dim, size_t nvectors, double *l) {
|
mas01cr@426
|
161 while(nvectors--) {
|
mas01cr@426
|
162 double *d1 = d;
|
mas01cr@426
|
163 double *d2 = d;
|
mas01cr@426
|
164 *l++ = audiodb_dot_product(d1, d2, dim);
|
mas01cr@426
|
165 d += dim;
|
mas01cr@426
|
166 }
|
mas01cr@426
|
167 }
|
mas01cr@427
|
168
|
mas01cr@427
|
169 // This is a common pattern in sequence queries: what we are doing is
|
mas01cr@427
|
170 // taking a window of length seqlen over a buffer of length length,
|
mas01cr@427
|
171 // and placing the sum of the elements in that window in the first
|
mas01cr@427
|
172 // element of the window: thus replacing all but the last seqlen
|
mas01cr@427
|
173 // elements in the buffer with the corresponding windowed sum.
|
mas01cr@427
|
174 static inline void audiodb_sequence_sum(double *buffer, int length, int seqlen) {
|
mas01cr@427
|
175 double tmp1, tmp2, *ps;
|
mas01cr@427
|
176 int j, w;
|
mas01cr@427
|
177
|
mas01cr@427
|
178 tmp1 = *buffer;
|
mas01cr@427
|
179 j = 1;
|
mas01cr@427
|
180 w = seqlen - 1;
|
mas01cr@427
|
181 while(w--) {
|
mas01cr@427
|
182 *buffer += buffer[j++];
|
mas01cr@427
|
183 }
|
mas01cr@427
|
184 ps = buffer + 1;
|
mas01cr@427
|
185 w = length - seqlen; // +1 - 1
|
mas01cr@427
|
186 while(w--) {
|
mas01cr@427
|
187 tmp2 = *ps;
|
mas01cr@427
|
188 if(isfinite(tmp1)) {
|
mas01cr@427
|
189 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
|
mas01cr@427
|
190 } else {
|
mas01cr@427
|
191 for(int i = 1; i < seqlen; i++) {
|
mas01cr@427
|
192 *ps += *(ps + i);
|
mas01cr@427
|
193 }
|
mas01cr@427
|
194 }
|
mas01cr@427
|
195 tmp1 = tmp2;
|
mas01cr@427
|
196 ps++;
|
mas01cr@427
|
197 }
|
mas01cr@427
|
198 }
|
mas01cr@427
|
199
|
mas01cr@427
|
200 // In contrast to audiodb_sequence_sum() above,
|
mas01cr@427
|
201 // audiodb_sequence_sqrt() and audiodb_sequence_average() below are
|
mas01cr@427
|
202 // simple mappers across the sequence.
|
mas01cr@427
|
203 static inline void audiodb_sequence_sqrt(double *buffer, int length, int seqlen) {
|
mas01cr@427
|
204 int w = length - seqlen + 1;
|
mas01cr@427
|
205 while(w--) {
|
mas01cr@427
|
206 *buffer = sqrt(*buffer);
|
mas01cr@427
|
207 buffer++;
|
mas01cr@427
|
208 }
|
mas01cr@427
|
209 }
|
mas01cr@427
|
210
|
mas01cr@427
|
211 static inline void audiodb_sequence_average(double *buffer, int length, int seqlen) {
|
mas01cr@427
|
212 int w = length - seqlen + 1;
|
mas01cr@427
|
213 while(w--) {
|
mas01cr@427
|
214 *buffer /= seqlen;
|
mas01cr@427
|
215 buffer++;
|
mas01cr@427
|
216 }
|
mas01cr@427
|
217 }
|
mas01cr@430
|
218
|
mas01cr@430
|
219 static inline uint32_t audiodb_key_index(adb_t *adb, const char *key) {
|
mas01cr@430
|
220 std::map<std::string,uint32_t>::iterator it;
|
mas01cr@453
|
221 it = adb->keymap->find(key);
|
mas01cr@453
|
222 if(it == adb->keymap->end()) {
|
mas01cr@430
|
223 return (uint32_t) -1;
|
mas01cr@430
|
224 } else {
|
mas01cr@430
|
225 return (*it).second;
|
mas01cr@430
|
226 }
|
mas01cr@430
|
227 }
|
mas01cr@433
|
228
|
mas01cr@469
|
229 static inline const char *audiodb_index_key(adb_t *adb, uint32_t index) {
|
mas01cr@469
|
230 return (*adb->keys)[index].c_str();
|
mas01cr@469
|
231 }
|
mas01cr@469
|
232
|
mas01cr@458
|
233 static inline uint32_t audiodb_index_to_track_id(uint32_t lshid, uint32_t n_point_bits) {
|
mas01cr@458
|
234 return (lshid >> n_point_bits);
|
mas01cr@458
|
235 }
|
mas01cr@458
|
236
|
mas01cr@458
|
237 static inline uint32_t audiodb_index_to_track_pos(uint32_t lshid, uint32_t n_point_bits) {
|
mas01cr@458
|
238 return (lshid & ((1 << n_point_bits) - 1));
|
mas01cr@458
|
239 }
|
mas01cr@458
|
240
|
mas01cr@458
|
241 static inline uint32_t audiodb_index_from_trackinfo(uint32_t track_id, uint32_t track_pos, uint32_t n_point_bits) {
|
mas01cr@458
|
242 return ((track_id << n_point_bits) | track_pos);
|
mas01cr@458
|
243 }
|
mas01cr@458
|
244
|
mas01cr@458
|
245 static inline uint32_t audiodb_lsh_n_point_bits(adb_t *adb) {
|
mas01cr@458
|
246 uint32_t nbits = adb->header->flags >> 28;
|
mas01cr@458
|
247 return (nbits ? nbits : O2_DEFAULT_LSH_N_POINT_BITS);
|
mas01cr@458
|
248 }
|
mas01cr@458
|
249
|
mas01cr@433
|
250 int audiodb_read_data(adb_t *, int, int, double **, size_t *);
|
mas01cr@443
|
251 int audiodb_insert_create_datum(adb_insert_t *, adb_datum_t *);
|
mas01cr@461
|
252 int audiodb_track_id_datum(adb_t *, uint32_t, adb_datum_t *);
|
mas01cr@443
|
253 int audiodb_free_datum(adb_datum_t *);
|
mas01cr@461
|
254 int audiodb_datum_qpointers(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *);
|
mas01cr@473
|
255 int audiodb_query_spec_qpointers(adb_t *, const adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *);
|
mas01cr@473
|
256 int audiodb_query_queue_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *, double *, adb_qpointers_internal_t *);
|
mas01cr@473
|
257 int audiodb_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);
|
mas01cr@460
|
258 char *audiodb_index_get_name(const char *, double, uint32_t);
|
mas01cr@460
|
259 bool audiodb_index_exists(const char *, double, uint32_t);
|
mas01cr@473
|
260 int audiodb_index_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);
|