annotate audioDB-internals.h @ 473:b2fd8113d8bc api-inversion

const declarations for some API arguments. This should make it slightly clearer whose responsibility (the user's) it is to manage the memory pointed to by the corresponding arguments. Suggested by Chris Cannam.
author mas01cr
date Tue, 06 Jan 2009 16:27:01 +0000
parents d3afc91d205d
children
rev   line source
mas01cr@457 1 #include "accumulator.h"
mas01cr@457 2
mas01cr@457 3 /* this struct is for writing polymorphic routines as puns. When
mas01cr@457 4 * inserting, we might have a "datum" (with actual numerical data) or
mas01cr@457 5 * a "reference" (with strings denoting pathnames containing numerical
mas01cr@457 6 * data), but most of the operations are the same. This struct, used
mas01cr@457 7 * only internally, allows us to write the main body of the insert
mas01cr@457 8 * code only once.
mas01cr@457 9 */
mas01cr@408 10 typedef struct adb_datum_internal {
mas01cr@408 11 uint32_t nvectors;
mas01cr@408 12 uint32_t dim;
mas01cr@408 13 const char *key;
mas01cr@408 14 void *data;
mas01cr@408 15 void *times;
mas01cr@408 16 void *power;
mas01cr@408 17 } adb_datum_internal_t;
mas01cr@408 18
mas01cr@463 19 /* this struct is to collect together a bunch of information about a
mas01cr@463 20 * query (or, in fact, a single database entry, or even a whole
mas01cr@463 21 * database). The _data pointers are immutable (hey, FIXME: should
mas01cr@463 22 * they be constified in some way?) so that free() can work on them
mas01cr@463 23 * later, while the ones without the suffix are mutable to maintain
mas01cr@463 24 * the "current" position in some way. mean_duration points to a
mas01cr@463 25 * (possibly single-element) array of mean durations for each track.
mas01cr@463 26 */
mas01cr@463 27 typedef struct adb_qpointers_internal {
mas01cr@463 28 uint32_t nvectors;
mas01cr@463 29 double *l2norm_data;
mas01cr@463 30 double *l2norm;
mas01cr@463 31 double *power_data;
mas01cr@463 32 double *power;
mas01cr@463 33 double *mean_duration;
mas01cr@463 34 } adb_qpointers_internal_t;
mas01cr@463 35
mas01cr@457 36 /* this struct is for maintaining per-query state. We don't want to
mas01cr@457 37 * store this stuff in the adb struct itself, because (a) it doesn't
mas01cr@457 38 * belong there and (b) in principle people might do two queries in
mas01cr@457 39 * parallel using the same adb handle. (b) is in practice a little
mas01cr@457 40 * bit academic because at the moment we're seeking all over the disk
mas01cr@457 41 * using adb->fd, but changing to use pread() might win us
mas01cr@457 42 * threadsafety eventually.
mas01cr@457 43 */
mas01cr@468 44 typedef struct adb_qstate_internal {
mas01cr@468 45 Accumulator *accumulator;
mas01cr@468 46 std::set<std::string> *allowed_keys;
mas01cr@468 47 std::priority_queue<PointPair> *exact_evaluation_queue;
mas01cr@468 48 LSH *lsh;
mas01cr@468 49 } adb_qstate_internal_t;
mas01cr@457 50
mas01cr@468 51 /* the transparent version of the opaque (forward-declared) adb_t. */
mas01cr@402 52 struct adb {
mas01cr@402 53 char *path;
mas01cr@402 54 int fd;
mas01cr@402 55 int flags;
mas01cr@402 56 adb_header_t *header;
mas01cr@453 57 std::vector<std::string> *keys;
mas01cr@453 58 std::map<std::string,uint32_t> *keymap;
mas01cr@432 59 std::vector<uint32_t> *track_lengths;
mas01cr@442 60 std::vector<off_t> *track_offsets;
mas01cr@465 61 LSH *cached_lsh;
mas01cr@402 62 };
mas01cr@402 63
mas01cr@416 64 typedef struct {
mas01cr@416 65 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 66 return strcmp(r1.key, r2.key) < 0;
mas01cr@416 67 }
mas01cr@416 68 } adb_result_key_lt;
mas01cr@416 69
mas01cr@416 70 typedef struct {
mas01cr@416 71 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 72 return r1.qpos < r2.qpos;
mas01cr@416 73 }
mas01cr@416 74 } adb_result_qpos_lt;
mas01cr@416 75
mas01cr@416 76 typedef struct {
mas01cr@416 77 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 78 return r1.dist < r2.dist;
mas01cr@416 79 }
mas01cr@416 80 } adb_result_dist_lt;
mas01cr@416 81
mas01cr@416 82 typedef struct {
mas01cr@416 83 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 84 return r1.dist > r2.dist;
mas01cr@416 85 }
mas01cr@416 86 } adb_result_dist_gt;
mas01cr@416 87
mas01cr@416 88 typedef struct {
mas01cr@416 89 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 90 return ((r1.ipos < r2.ipos) ||
mas01cr@416 91 ((r1.ipos == r2.ipos) &&
mas01cr@416 92 ((r1.qpos < r2.qpos) ||
mas01cr@416 93 ((r1.qpos == r2.qpos) && (strcmp(r1.key, r2.key) < 0)))));
mas01cr@416 94 }
mas01cr@416 95 } adb_result_triple_lt;
mas01cr@416 96
mas01cr@401 97 /* We could go gcc-specific here and use typeof() instead of passing
mas01cr@401 98 * in an explicit type. Answers on a postcard as to whether that's a
mas01cr@401 99 * good plan or not. */
mas01cr@401 100 #define mmap_or_goto_error(type, var, start, length) \
mas01cr@401 101 { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \
mas01cr@401 102 if(tmp == (void *) -1) { \
mas01cr@401 103 goto error; \
mas01cr@401 104 } \
mas01cr@401 105 var = (type) tmp; \
mas01cr@401 106 }
mas01cr@401 107
mas01cr@401 108 #define maybe_munmap(table, length) \
mas01cr@401 109 { if(table) { \
mas01cr@401 110 munmap(table, length); \
mas01cr@401 111 } \
mas01cr@401 112 }
mas01cr@401 113
mas01cr@410 114 #define write_or_goto_error(fd, buffer, size) \
mas01cr@410 115 { ssize_t tmp = size; \
mas01cr@410 116 if(write(fd, buffer, size) != tmp) { \
mas01cr@410 117 goto error; \
mas01cr@410 118 } \
mas01cr@410 119 }
mas01cr@410 120
mas01cr@410 121 #define read_or_goto_error(fd, buffer, size) \
mas01cr@410 122 { ssize_t tmp = size; \
mas01cr@410 123 if(read(fd, buffer, size) != tmp) { \
mas01cr@410 124 goto error; \
mas01cr@410 125 } \
mas01cr@410 126 }
mas01cr@410 127
mas01cr@401 128 static inline int audiodb_sync_header(adb_t *adb) {
mas01cr@401 129 off_t pos;
mas01cr@401 130 pos = lseek(adb->fd, (off_t) 0, SEEK_CUR);
mas01cr@401 131 if(pos == (off_t) -1) {
mas01cr@401 132 goto error;
mas01cr@401 133 }
mas01cr@401 134 if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) {
mas01cr@401 135 goto error;
mas01cr@401 136 }
mas01cr@401 137 if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) {
mas01cr@401 138 goto error;
mas01cr@401 139 }
mas01cr@401 140
mas01cr@401 141 /* can be fsync() if fdatasync() is racily exciting and new */
mas01cr@401 142 fdatasync(adb->fd);
mas01cr@401 143 if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) {
mas01cr@401 144 goto error;
mas01cr@401 145 }
mas01cr@401 146 return 0;
mas01cr@401 147
mas01cr@401 148 error:
mas01cr@401 149 return 1;
mas01cr@401 150 }
mas01cr@425 151
mas01cr@425 152 static inline double audiodb_dot_product(double *p, double *q, size_t count) {
mas01cr@425 153 double result = 0;
mas01cr@425 154 while(count--) {
mas01cr@425 155 result += *p++ * *q++;
mas01cr@425 156 }
mas01cr@425 157 return result;
mas01cr@425 158 }
mas01cr@426 159
mas01cr@426 160 static inline void audiodb_l2norm_buffer(double *d, size_t dim, size_t nvectors, double *l) {
mas01cr@426 161 while(nvectors--) {
mas01cr@426 162 double *d1 = d;
mas01cr@426 163 double *d2 = d;
mas01cr@426 164 *l++ = audiodb_dot_product(d1, d2, dim);
mas01cr@426 165 d += dim;
mas01cr@426 166 }
mas01cr@426 167 }
mas01cr@427 168
mas01cr@427 169 // This is a common pattern in sequence queries: what we are doing is
mas01cr@427 170 // taking a window of length seqlen over a buffer of length length,
mas01cr@427 171 // and placing the sum of the elements in that window in the first
mas01cr@427 172 // element of the window: thus replacing all but the last seqlen
mas01cr@427 173 // elements in the buffer with the corresponding windowed sum.
mas01cr@427 174 static inline void audiodb_sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@427 175 double tmp1, tmp2, *ps;
mas01cr@427 176 int j, w;
mas01cr@427 177
mas01cr@427 178 tmp1 = *buffer;
mas01cr@427 179 j = 1;
mas01cr@427 180 w = seqlen - 1;
mas01cr@427 181 while(w--) {
mas01cr@427 182 *buffer += buffer[j++];
mas01cr@427 183 }
mas01cr@427 184 ps = buffer + 1;
mas01cr@427 185 w = length - seqlen; // +1 - 1
mas01cr@427 186 while(w--) {
mas01cr@427 187 tmp2 = *ps;
mas01cr@427 188 if(isfinite(tmp1)) {
mas01cr@427 189 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@427 190 } else {
mas01cr@427 191 for(int i = 1; i < seqlen; i++) {
mas01cr@427 192 *ps += *(ps + i);
mas01cr@427 193 }
mas01cr@427 194 }
mas01cr@427 195 tmp1 = tmp2;
mas01cr@427 196 ps++;
mas01cr@427 197 }
mas01cr@427 198 }
mas01cr@427 199
mas01cr@427 200 // In contrast to audiodb_sequence_sum() above,
mas01cr@427 201 // audiodb_sequence_sqrt() and audiodb_sequence_average() below are
mas01cr@427 202 // simple mappers across the sequence.
mas01cr@427 203 static inline void audiodb_sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@427 204 int w = length - seqlen + 1;
mas01cr@427 205 while(w--) {
mas01cr@427 206 *buffer = sqrt(*buffer);
mas01cr@427 207 buffer++;
mas01cr@427 208 }
mas01cr@427 209 }
mas01cr@427 210
mas01cr@427 211 static inline void audiodb_sequence_average(double *buffer, int length, int seqlen) {
mas01cr@427 212 int w = length - seqlen + 1;
mas01cr@427 213 while(w--) {
mas01cr@427 214 *buffer /= seqlen;
mas01cr@427 215 buffer++;
mas01cr@427 216 }
mas01cr@427 217 }
mas01cr@430 218
mas01cr@430 219 static inline uint32_t audiodb_key_index(adb_t *adb, const char *key) {
mas01cr@430 220 std::map<std::string,uint32_t>::iterator it;
mas01cr@453 221 it = adb->keymap->find(key);
mas01cr@453 222 if(it == adb->keymap->end()) {
mas01cr@430 223 return (uint32_t) -1;
mas01cr@430 224 } else {
mas01cr@430 225 return (*it).second;
mas01cr@430 226 }
mas01cr@430 227 }
mas01cr@433 228
mas01cr@469 229 static inline const char *audiodb_index_key(adb_t *adb, uint32_t index) {
mas01cr@469 230 return (*adb->keys)[index].c_str();
mas01cr@469 231 }
mas01cr@469 232
mas01cr@458 233 static inline uint32_t audiodb_index_to_track_id(uint32_t lshid, uint32_t n_point_bits) {
mas01cr@458 234 return (lshid >> n_point_bits);
mas01cr@458 235 }
mas01cr@458 236
mas01cr@458 237 static inline uint32_t audiodb_index_to_track_pos(uint32_t lshid, uint32_t n_point_bits) {
mas01cr@458 238 return (lshid & ((1 << n_point_bits) - 1));
mas01cr@458 239 }
mas01cr@458 240
mas01cr@458 241 static inline uint32_t audiodb_index_from_trackinfo(uint32_t track_id, uint32_t track_pos, uint32_t n_point_bits) {
mas01cr@458 242 return ((track_id << n_point_bits) | track_pos);
mas01cr@458 243 }
mas01cr@458 244
mas01cr@458 245 static inline uint32_t audiodb_lsh_n_point_bits(adb_t *adb) {
mas01cr@458 246 uint32_t nbits = adb->header->flags >> 28;
mas01cr@458 247 return (nbits ? nbits : O2_DEFAULT_LSH_N_POINT_BITS);
mas01cr@458 248 }
mas01cr@458 249
mas01cr@433 250 int audiodb_read_data(adb_t *, int, int, double **, size_t *);
mas01cr@443 251 int audiodb_insert_create_datum(adb_insert_t *, adb_datum_t *);
mas01cr@461 252 int audiodb_track_id_datum(adb_t *, uint32_t, adb_datum_t *);
mas01cr@443 253 int audiodb_free_datum(adb_datum_t *);
mas01cr@461 254 int audiodb_datum_qpointers(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *);
mas01cr@473 255 int audiodb_query_spec_qpointers(adb_t *, const adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *);
mas01cr@473 256 int audiodb_query_queue_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *, double *, adb_qpointers_internal_t *);
mas01cr@473 257 int audiodb_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);
mas01cr@460 258 char *audiodb_index_get_name(const char *, double, uint32_t);
mas01cr@460 259 bool audiodb_index_exists(const char *, double, uint32_t);
mas01cr@473 260 int audiodb_index_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);