annotate audioDB-internals.h @ 507:e7fd50483311

Free bits of the datum constructed in audioDB::query. We're not quite safe: error calls between allocation of some of these bits and pieces and their use will cause failure... but not freeing things here is definitely wrong.
author mas01cr
date Tue, 13 Jan 2009 21:37:10 +0000
parents 342822c2d49a
children cc2b97d020b1
rev   line source
mas01cr@498 1 #include "accumulator.h"
mas01cr@498 2
mas01cr@498 3 /* this struct is for writing polymorphic routines as puns. When
mas01cr@498 4 * inserting, we might have a "datum" (with actual numerical data) or
mas01cr@498 5 * a "reference" (with strings denoting pathnames containing numerical
mas01cr@498 6 * data), but most of the operations are the same. This struct, used
mas01cr@498 7 * only internally, allows us to write the main body of the insert
mas01cr@498 8 * code only once.
mas01cr@498 9 */
mas01cr@498 10 typedef struct adb_datum_internal {
mas01cr@498 11 uint32_t nvectors;
mas01cr@498 12 uint32_t dim;
mas01cr@498 13 const char *key;
mas01cr@498 14 void *data;
mas01cr@498 15 void *times;
mas01cr@498 16 void *power;
mas01cr@498 17 } adb_datum_internal_t;
mas01cr@498 18
mas01cr@498 19 /* this struct is to collect together a bunch of information about a
mas01cr@498 20 * query (or, in fact, a single database entry, or even a whole
mas01cr@498 21 * database). The _data pointers are immutable (hey, FIXME: should
mas01cr@498 22 * they be constified in some way?) so that free() can work on them
mas01cr@498 23 * later, while the ones without the suffix are mutable to maintain
mas01cr@498 24 * the "current" position in some way. mean_duration points to a
mas01cr@498 25 * (possibly single-element) array of mean durations for each track.
mas01cr@498 26 */
mas01cr@498 27 typedef struct adb_qpointers_internal {
mas01cr@498 28 uint32_t nvectors;
mas01cr@498 29 double *l2norm_data;
mas01cr@498 30 double *l2norm;
mas01cr@498 31 double *power_data;
mas01cr@498 32 double *power;
mas01cr@498 33 double *mean_duration;
mas01cr@498 34 } adb_qpointers_internal_t;
mas01cr@498 35
mas01cr@498 36 /* this struct is for maintaining per-query state. We don't want to
mas01cr@498 37 * store this stuff in the adb struct itself, because (a) it doesn't
mas01cr@498 38 * belong there and (b) in principle people might do two queries in
mas01cr@498 39 * parallel using the same adb handle. (b) is in practice a little
mas01cr@498 40 * bit academic because at the moment we're seeking all over the disk
mas01cr@498 41 * using adb->fd, but changing to use pread() might win us
mas01cr@498 42 * threadsafety eventually.
mas01cr@498 43 */
mas01cr@498 44 typedef struct adb_qstate_internal {
mas01cr@498 45 Accumulator *accumulator;
mas01cr@498 46 std::set<std::string> *allowed_keys;
mas01cr@498 47 std::priority_queue<PointPair> *exact_evaluation_queue;
mas01cr@498 48 LSH *lsh;
mas01cr@498 49 } adb_qstate_internal_t;
mas01cr@498 50
mas01cr@498 51 /* the transparent version of the opaque (forward-declared) adb_t. */
mas01cr@498 52 struct adb {
mas01cr@498 53 char *path;
mas01cr@498 54 int fd;
mas01cr@498 55 int flags;
mas01cr@498 56 adb_header_t *header;
mas01cr@498 57 std::vector<std::string> *keys;
mas01cr@498 58 std::map<std::string,uint32_t> *keymap;
mas01cr@498 59 std::vector<uint32_t> *track_lengths;
mas01cr@498 60 std::vector<off_t> *track_offsets;
mas01cr@498 61 LSH *cached_lsh;
mas01cr@498 62 };
mas01cr@498 63
mas01cr@498 64 typedef struct {
mas01cr@498 65 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 66 return strcmp(r1.key, r2.key) < 0;
mas01cr@498 67 }
mas01cr@498 68 } adb_result_key_lt;
mas01cr@498 69
mas01cr@498 70 typedef struct {
mas01cr@498 71 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 72 return r1.qpos < r2.qpos;
mas01cr@498 73 }
mas01cr@498 74 } adb_result_qpos_lt;
mas01cr@498 75
mas01cr@498 76 typedef struct {
mas01cr@498 77 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 78 return r1.dist < r2.dist;
mas01cr@498 79 }
mas01cr@498 80 } adb_result_dist_lt;
mas01cr@498 81
mas01cr@498 82 typedef struct {
mas01cr@498 83 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 84 return r1.dist > r2.dist;
mas01cr@498 85 }
mas01cr@498 86 } adb_result_dist_gt;
mas01cr@498 87
mas01cr@498 88 typedef struct {
mas01cr@498 89 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 90 return ((r1.ipos < r2.ipos) ||
mas01cr@498 91 ((r1.ipos == r2.ipos) &&
mas01cr@498 92 ((r1.qpos < r2.qpos) ||
mas01cr@498 93 ((r1.qpos == r2.qpos) && (strcmp(r1.key, r2.key) < 0)))));
mas01cr@498 94 }
mas01cr@498 95 } adb_result_triple_lt;
mas01cr@498 96
mas01cr@498 97 /* We could go gcc-specific here and use typeof() instead of passing
mas01cr@498 98 * in an explicit type. Answers on a postcard as to whether that's a
mas01cr@498 99 * good plan or not. */
mas01cr@498 100 #define mmap_or_goto_error(type, var, start, length) \
mas01cr@498 101 { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \
mas01cr@498 102 if(tmp == (void *) -1) { \
mas01cr@498 103 goto error; \
mas01cr@498 104 } \
mas01cr@498 105 var = (type) tmp; \
mas01cr@498 106 }
mas01cr@498 107
mas01cr@498 108 #define maybe_munmap(table, length) \
mas01cr@498 109 { if(table) { \
mas01cr@498 110 munmap(table, length); \
mas01cr@498 111 } \
mas01cr@498 112 }
mas01cr@498 113
mas01cr@498 114 #define write_or_goto_error(fd, buffer, size) \
mas01cr@498 115 { ssize_t tmp = size; \
mas01cr@498 116 if(write(fd, buffer, size) != tmp) { \
mas01cr@498 117 goto error; \
mas01cr@498 118 } \
mas01cr@498 119 }
mas01cr@498 120
mas01cr@498 121 #define read_or_goto_error(fd, buffer, size) \
mas01cr@498 122 { ssize_t tmp = size; \
mas01cr@498 123 if(read(fd, buffer, size) != tmp) { \
mas01cr@498 124 goto error; \
mas01cr@498 125 } \
mas01cr@498 126 }
mas01cr@498 127
mas01cr@498 128 static inline int audiodb_sync_header(adb_t *adb) {
mas01cr@498 129 off_t pos;
mas01cr@498 130 pos = lseek(adb->fd, (off_t) 0, SEEK_CUR);
mas01cr@498 131 if(pos == (off_t) -1) {
mas01cr@498 132 goto error;
mas01cr@498 133 }
mas01cr@498 134 if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) {
mas01cr@498 135 goto error;
mas01cr@498 136 }
mas01cr@498 137 if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) {
mas01cr@498 138 goto error;
mas01cr@498 139 }
mas01cr@498 140
mas01cr@498 141 /* can be fsync() if fdatasync() is racily exciting and new */
mas01cr@498 142 fdatasync(adb->fd);
mas01cr@498 143 if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) {
mas01cr@498 144 goto error;
mas01cr@498 145 }
mas01cr@498 146 return 0;
mas01cr@498 147
mas01cr@498 148 error:
mas01cr@498 149 return 1;
mas01cr@498 150 }
mas01cr@498 151
mas01cr@498 152 static inline double audiodb_dot_product(double *p, double *q, size_t count) {
mas01cr@498 153 double result = 0;
mas01cr@498 154 while(count--) {
mas01cr@498 155 result += *p++ * *q++;
mas01cr@498 156 }
mas01cr@498 157 return result;
mas01cr@498 158 }
mas01cr@498 159
mas01cr@498 160 static inline void audiodb_l2norm_buffer(double *d, size_t dim, size_t nvectors, double *l) {
mas01cr@498 161 while(nvectors--) {
mas01cr@498 162 double *d1 = d;
mas01cr@498 163 double *d2 = d;
mas01cr@498 164 *l++ = audiodb_dot_product(d1, d2, dim);
mas01cr@498 165 d += dim;
mas01cr@498 166 }
mas01cr@498 167 }
mas01cr@498 168
mas01cr@498 169 // This is a common pattern in sequence queries: what we are doing is
mas01cr@498 170 // taking a window of length seqlen over a buffer of length length,
mas01cr@498 171 // and placing the sum of the elements in that window in the first
mas01cr@498 172 // element of the window: thus replacing all but the last seqlen
mas01cr@498 173 // elements in the buffer with the corresponding windowed sum.
mas01cr@498 174 static inline void audiodb_sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@498 175 double tmp1, tmp2, *ps;
mas01cr@498 176 int j, w;
mas01cr@498 177
mas01cr@498 178 tmp1 = *buffer;
mas01cr@498 179 j = 1;
mas01cr@498 180 w = seqlen - 1;
mas01cr@498 181 while(w--) {
mas01cr@498 182 *buffer += buffer[j++];
mas01cr@498 183 }
mas01cr@498 184 ps = buffer + 1;
mas01cr@498 185 w = length - seqlen; // +1 - 1
mas01cr@498 186 while(w--) {
mas01cr@498 187 tmp2 = *ps;
mas01cr@498 188 if(isfinite(tmp1)) {
mas01cr@498 189 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@498 190 } else {
mas01cr@498 191 for(int i = 1; i < seqlen; i++) {
mas01cr@498 192 *ps += *(ps + i);
mas01cr@498 193 }
mas01cr@498 194 }
mas01cr@498 195 tmp1 = tmp2;
mas01cr@498 196 ps++;
mas01cr@498 197 }
mas01cr@498 198 }
mas01cr@498 199
mas01cr@498 200 // In contrast to audiodb_sequence_sum() above,
mas01cr@498 201 // audiodb_sequence_sqrt() and audiodb_sequence_average() below are
mas01cr@498 202 // simple mappers across the sequence.
mas01cr@498 203 static inline void audiodb_sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@498 204 int w = length - seqlen + 1;
mas01cr@498 205 while(w--) {
mas01cr@498 206 *buffer = sqrt(*buffer);
mas01cr@498 207 buffer++;
mas01cr@498 208 }
mas01cr@498 209 }
mas01cr@498 210
mas01cr@498 211 static inline void audiodb_sequence_average(double *buffer, int length, int seqlen) {
mas01cr@498 212 int w = length - seqlen + 1;
mas01cr@498 213 while(w--) {
mas01cr@498 214 *buffer /= seqlen;
mas01cr@498 215 buffer++;
mas01cr@498 216 }
mas01cr@498 217 }
mas01cr@498 218
mas01cr@498 219 static inline uint32_t audiodb_key_index(adb_t *adb, const char *key) {
mas01cr@498 220 std::map<std::string,uint32_t>::iterator it;
mas01cr@498 221 it = adb->keymap->find(key);
mas01cr@498 222 if(it == adb->keymap->end()) {
mas01cr@498 223 return (uint32_t) -1;
mas01cr@498 224 } else {
mas01cr@498 225 return (*it).second;
mas01cr@498 226 }
mas01cr@498 227 }
mas01cr@498 228
mas01cr@498 229 static inline const char *audiodb_index_key(adb_t *adb, uint32_t index) {
mas01cr@498 230 return (*adb->keys)[index].c_str();
mas01cr@498 231 }
mas01cr@498 232
mas01cr@498 233 static inline uint32_t audiodb_index_to_track_id(uint32_t lshid, uint32_t n_point_bits) {
mas01cr@498 234 return (lshid >> n_point_bits);
mas01cr@498 235 }
mas01cr@498 236
mas01cr@498 237 static inline uint32_t audiodb_index_to_track_pos(uint32_t lshid, uint32_t n_point_bits) {
mas01cr@498 238 return (lshid & ((1 << n_point_bits) - 1));
mas01cr@498 239 }
mas01cr@498 240
mas01cr@498 241 static inline uint32_t audiodb_index_from_trackinfo(uint32_t track_id, uint32_t track_pos, uint32_t n_point_bits) {
mas01cr@498 242 return ((track_id << n_point_bits) | track_pos);
mas01cr@498 243 }
mas01cr@498 244
mas01cr@498 245 static inline uint32_t audiodb_lsh_n_point_bits(adb_t *adb) {
mas01cr@498 246 uint32_t nbits = adb->header->flags >> 28;
mas01cr@498 247 return (nbits ? nbits : O2_DEFAULT_LSH_N_POINT_BITS);
mas01cr@498 248 }
mas01cr@498 249
mas01cr@498 250 int audiodb_read_data(adb_t *, int, int, double **, size_t *);
mas01cr@498 251 int audiodb_insert_create_datum(adb_insert_t *, adb_datum_t *);
mas01cr@498 252 int audiodb_track_id_datum(adb_t *, uint32_t, adb_datum_t *);
mas01cr@498 253 int audiodb_free_datum(adb_datum_t *);
mas01cr@498 254 int audiodb_datum_qpointers(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *);
mas01cr@498 255 int audiodb_query_spec_qpointers(adb_t *, const adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *);
mas01cr@498 256 int audiodb_query_queue_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *, double *, adb_qpointers_internal_t *);
mas01cr@498 257 int audiodb_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);
mas01cr@498 258 char *audiodb_index_get_name(const char *, double, uint32_t);
mas01cr@498 259 bool audiodb_index_exists(const char *, double, uint32_t);
mas01cr@498 260 int audiodb_index_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);